summaryrefslogtreecommitdiff
path: root/tools/lz/mpcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lz/mpcomp.c')
-rw-r--r--tools/lz/mpcomp.c112
1 files changed, 112 insertions, 0 deletions
diff --git a/tools/lz/mpcomp.c b/tools/lz/mpcomp.c
new file mode 100644
index 000000000..5961be8eb
--- /dev/null
+++ b/tools/lz/mpcomp.c
@@ -0,0 +1,112 @@
+#include "proto.h"
+
+/*
+ Multi-pass compressor: performs an initial pass generating a single command for each byte position in the data and
+ refines the command stream further in subsequent passes.
+ Methods defined: 16
+ Flags values: the flags are a bitfield; each bit triggers some alternate behavior if set:
+ 1: always emit a literal command (0) for the first byte of the file
+ 2: when reducing a two-byte repetition (2) command in the overlap elimination pass, don't force it to contain a
+ whole number of repetitions (i.e., an even count)
+ 4: don't emit copy commands (4, 5, 6) with a count of 3
+ 8: don't emit single-byte repetition (1) commands
+*/
+
+struct command * try_compress_multi_pass (const unsigned char * data, const unsigned char * flipped, unsigned short * size, unsigned flags) {
+ struct command * result = calloc(*size, sizeof(struct command));
+ unsigned char * reversed = malloc(*size);
+ short * sources = malloc(*size * sizeof(short));
+ unsigned short pos, next, current = 0;
+ for (pos = 0; pos < *size; pos ++) {
+ reversed[pos] = data[*size - 1 - pos];
+ sources[pos] = -1;
+ }
+ for (pos = (flags & 1); pos < *size; pos += (result[pos].count >= MULTIPASS_SKIP_THRESHOLD) ? result[pos].count : 1) {
+ result[pos] = pick_command_for_pass(data, flipped, reversed, sources, *size, pos, flags);
+ if ((result[pos].command >= 4) || (result[pos].count < MULTIPASS_SKIP_THRESHOLD)) sources[current ++] = pos;
+ }
+ free(reversed);
+ free(sources);
+ for (pos = 0; pos < *size; pos ++) {
+ for (current = 1; current < result[pos].count; current ++) if (result[pos + current].count > result[pos].count) {
+ result[pos].count = current;
+ if ((result[pos].command == 2) && (current & 1) && !(flags & 2)) result[pos].count --;
+ }
+ if (result[pos].count <= command_size(result[pos])) result[pos] = (struct command) {.command = 0, .count = 0};
+ }
+ for (pos = 0; pos < *size; pos ++)
+ if (!result[pos].command) {
+ for (current = 1; (current < MAX_COMMAND_COUNT) && ((pos + current) < *size); current ++) if (result[pos + current].command) break;
+ result[pos] = (struct command) {.command = 0, .count = current, .value = pos};
+ } else if (result[pos].count > MAX_COMMAND_COUNT) {
+ result[pos + MAX_COMMAND_COUNT] = result[pos];
+ result[pos + MAX_COMMAND_COUNT].count -= MAX_COMMAND_COUNT;
+ if ((result[pos + MAX_COMMAND_COUNT].command >= 4) && (result[pos + MAX_COMMAND_COUNT].value >= 0))
+ result[pos + MAX_COMMAND_COUNT].value += (result[pos].command == 6) ? -MAX_COMMAND_COUNT : MAX_COMMAND_COUNT;
+ result[pos].count = MAX_COMMAND_COUNT;
+ }
+ for (next = pos = 0; pos < *size; pos ++)
+ if (pos == next)
+ next += result[pos].count;
+ else
+ result[pos].command = 7;
+ repack(&result, size);
+ return result;
+}
+
+struct command pick_command_for_pass (const unsigned char * data, const unsigned char * flipped, const unsigned char * reversed, const short * sources,
+ unsigned short length, unsigned short position, unsigned flags) {
+ struct command result = pick_repetition_for_pass(data, length, position, flags);
+ if (result.count >= MULTIPASS_SKIP_THRESHOLD) return result;
+ unsigned char p;
+ for (p = 0; p < 3; p ++) {
+ struct command temp = pick_copy_for_pass(data, p[(const unsigned char * []) {data, flipped, reversed}], sources, p + 4, length, position, flags);
+ if (temp.command == 7) continue;
+ if (temp.count > result.count) result = temp;
+ }
+ if ((result.command >= 4) && (result.value >= (position - LOOKBACK_LIMIT))) result.value -= position;
+ return result;
+}
+
+struct command pick_repetition_for_pass (const unsigned char * data, unsigned short length, unsigned short position, unsigned flags) {
+ unsigned short p;
+ if (data[position]) {
+ if ((position + 1) >= length) return (struct command) {.command = 1, .count = 1, .value = data[position]};
+ struct command result;
+ if (!(flags & 8) && (data[position] == data[position + 1]))
+ result = (struct command) {.command = 1, .value = data[position]};
+ else
+ result = (struct command) {.command = 2, .value = data[position] | (data[position + 1] << 8)};
+ for (p = 1; ((position + p) < length) && (p < LOOKAHEAD_LIMIT); p ++) if (data[position + p] != data[position + (p & 1)]) break;
+ result.count = p;
+ return result;
+ } else {
+ for (p = position + 1; (p < length) && (p < (position + LOOKAHEAD_LIMIT)); p ++) if (data[p]) break;
+ return (struct command) {.command = 3, .count = p - position};
+ }
+}
+
+struct command pick_copy_for_pass (const unsigned char * data, const unsigned char * reference, const short * sources, unsigned char command_type,
+ unsigned short length, unsigned short position, unsigned flags) {
+ struct command result = {.command = 7, .count = (flags & 4) ? 4 : 3};
+ if (length < 3) return result;
+ unsigned refpos, count;
+ const unsigned char * current;
+ unsigned char buffer[6] = {0};
+ memcpy(buffer, reference + length - 3, 3);
+ while (*sources >= 0) {
+ refpos = *(sources ++);
+ if (command_type == 6) refpos = length - 1 - refpos;
+ if (refpos >= (length - 3))
+ current = buffer + refpos - (length - 3);
+ else
+ current = reference + refpos;
+ if (memcmp(data + position, current, 4)) continue;
+ for (count = 4; (count < (length - position)) && (count < (length - refpos)); count ++) if (data[position + count] != current[count]) break;
+ if (count > (length - refpos)) count = length - refpos;
+ if (count > (length - position)) count = length - position;
+ if (result.count > count) continue;
+ result = (struct command) {.command = command_type, .count = count, .value = sources[-1]};
+ }
+ return result;
+}