diff options
Diffstat (limited to 'tools/lz/mpcomp.c')
-rw-r--r-- | tools/lz/mpcomp.c | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/tools/lz/mpcomp.c b/tools/lz/mpcomp.c new file mode 100644 index 000000000..5961be8eb --- /dev/null +++ b/tools/lz/mpcomp.c @@ -0,0 +1,112 @@ +#include "proto.h" + +/* + Multi-pass compressor: performs an initial pass generating a single command for each byte position in the data and + refines the command stream further in subsequent passes. + Methods defined: 16 + Flags values: the flags are a bitfield; each bit triggers some alternate behavior if set: + 1: always emit a literal command (0) for the first byte of the file + 2: when reducing a two-byte repetition (2) command in the overlap elimination pass, don't force it to contain a + whole number of repetitions (i.e., an even count) + 4: don't emit copy commands (4, 5, 6) with a count of 3 + 8: don't emit single-byte repetition (1) commands +*/ + +struct command * try_compress_multi_pass (const unsigned char * data, const unsigned char * flipped, unsigned short * size, unsigned flags) { + struct command * result = calloc(*size, sizeof(struct command)); + unsigned char * reversed = malloc(*size); + short * sources = malloc(*size * sizeof(short)); + unsigned short pos, next, current = 0; + for (pos = 0; pos < *size; pos ++) { + reversed[pos] = data[*size - 1 - pos]; + sources[pos] = -1; + } + for (pos = (flags & 1); pos < *size; pos += (result[pos].count >= MULTIPASS_SKIP_THRESHOLD) ? result[pos].count : 1) { + result[pos] = pick_command_for_pass(data, flipped, reversed, sources, *size, pos, flags); + if ((result[pos].command >= 4) || (result[pos].count < MULTIPASS_SKIP_THRESHOLD)) sources[current ++] = pos; + } + free(reversed); + free(sources); + for (pos = 0; pos < *size; pos ++) { + for (current = 1; current < result[pos].count; current ++) if (result[pos + current].count > result[pos].count) { + result[pos].count = current; + if ((result[pos].command == 2) && (current & 1) && !(flags & 2)) result[pos].count --; + } + if (result[pos].count <= command_size(result[pos])) result[pos] = (struct command) {.command = 0, .count = 0}; + } + for (pos = 0; pos < *size; pos ++) + if (!result[pos].command) { + for (current = 1; (current < MAX_COMMAND_COUNT) && ((pos + current) < *size); current ++) if (result[pos + current].command) break; + result[pos] = (struct command) {.command = 0, .count = current, .value = pos}; + } else if (result[pos].count > MAX_COMMAND_COUNT) { + result[pos + MAX_COMMAND_COUNT] = result[pos]; + result[pos + MAX_COMMAND_COUNT].count -= MAX_COMMAND_COUNT; + if ((result[pos + MAX_COMMAND_COUNT].command >= 4) && (result[pos + MAX_COMMAND_COUNT].value >= 0)) + result[pos + MAX_COMMAND_COUNT].value += (result[pos].command == 6) ? -MAX_COMMAND_COUNT : MAX_COMMAND_COUNT; + result[pos].count = MAX_COMMAND_COUNT; + } + for (next = pos = 0; pos < *size; pos ++) + if (pos == next) + next += result[pos].count; + else + result[pos].command = 7; + repack(&result, size); + return result; +} + +struct command pick_command_for_pass (const unsigned char * data, const unsigned char * flipped, const unsigned char * reversed, const short * sources, + unsigned short length, unsigned short position, unsigned flags) { + struct command result = pick_repetition_for_pass(data, length, position, flags); + if (result.count >= MULTIPASS_SKIP_THRESHOLD) return result; + unsigned char p; + for (p = 0; p < 3; p ++) { + struct command temp = pick_copy_for_pass(data, p[(const unsigned char * []) {data, flipped, reversed}], sources, p + 4, length, position, flags); + if (temp.command == 7) continue; + if (temp.count > result.count) result = temp; + } + if ((result.command >= 4) && (result.value >= (position - LOOKBACK_LIMIT))) result.value -= position; + return result; +} + +struct command pick_repetition_for_pass (const unsigned char * data, unsigned short length, unsigned short position, unsigned flags) { + unsigned short p; + if (data[position]) { + if ((position + 1) >= length) return (struct command) {.command = 1, .count = 1, .value = data[position]}; + struct command result; + if (!(flags & 8) && (data[position] == data[position + 1])) + result = (struct command) {.command = 1, .value = data[position]}; + else + result = (struct command) {.command = 2, .value = data[position] | (data[position + 1] << 8)}; + for (p = 1; ((position + p) < length) && (p < LOOKAHEAD_LIMIT); p ++) if (data[position + p] != data[position + (p & 1)]) break; + result.count = p; + return result; + } else { + for (p = position + 1; (p < length) && (p < (position + LOOKAHEAD_LIMIT)); p ++) if (data[p]) break; + return (struct command) {.command = 3, .count = p - position}; + } +} + +struct command pick_copy_for_pass (const unsigned char * data, const unsigned char * reference, const short * sources, unsigned char command_type, + unsigned short length, unsigned short position, unsigned flags) { + struct command result = {.command = 7, .count = (flags & 4) ? 4 : 3}; + if (length < 3) return result; + unsigned refpos, count; + const unsigned char * current; + unsigned char buffer[6] = {0}; + memcpy(buffer, reference + length - 3, 3); + while (*sources >= 0) { + refpos = *(sources ++); + if (command_type == 6) refpos = length - 1 - refpos; + if (refpos >= (length - 3)) + current = buffer + refpos - (length - 3); + else + current = reference + refpos; + if (memcmp(data + position, current, 4)) continue; + for (count = 4; (count < (length - position)) && (count < (length - refpos)); count ++) if (data[position + count] != current[count]) break; + if (count > (length - refpos)) count = length - refpos; + if (count > (length - position)) count = length - position; + if (result.count > count) continue; + result = (struct command) {.command = command_type, .count = count, .value = sources[-1]}; + } + return result; +} |