diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/Makefile | 9 | ||||
-rw-r--r-- | tools/lz/global.c | 35 | ||||
-rw-r--r-- | tools/lz/main.c | 54 | ||||
-rw-r--r-- | tools/lz/merging.c | 102 | ||||
-rw-r--r-- | tools/lz/mpcomp.c | 112 | ||||
-rw-r--r-- | tools/lz/nullcomp.c | 20 | ||||
-rw-r--r-- | tools/lz/options.c | 141 | ||||
-rw-r--r-- | tools/lz/output.c | 138 | ||||
-rw-r--r-- | tools/lz/packing.c | 56 | ||||
-rw-r--r-- | tools/lz/proto.h | 107 | ||||
-rw-r--r-- | tools/lz/repcomp.c | 63 | ||||
-rw-r--r-- | tools/lz/spcomp.c | 141 | ||||
-rw-r--r-- | tools/lz/uncomp.c | 92 | ||||
-rw-r--r-- | tools/lz/util.c | 54 | ||||
-rw-r--r-- | tools/lzcomp.c | 512 | ||||
-rw-r--r-- | tools/mapreader.py | 11 | ||||
-rw-r--r-- | tools/md5.c | 128 | ||||
-rw-r--r-- | tools/used_space.py | 3 |
18 files changed, 1129 insertions, 649 deletions
diff --git a/tools/Makefile b/tools/Makefile index 768f8912e..5dd3139fa 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -10,15 +10,18 @@ tools := \ palette \ pokemon_animation \ pokemon_animation_graphics \ - gfx \ - md5 + gfx all: $(tools) @: clean: rm -f $(tools) -gfx md5: common.h +gfx: common.h + +lzcomp: CFLAGS = -O3 -flto -std=c11 -Wall -Wextra -pedantic -Wno-strict-overflow -Wno-sign-compare +lzcomp: $(wildcard lz/*.c) $(wildcard lz/*.h) + $(CC) $(CFLAGS) -o $@ lz/*.c %: %.c $(CC) $(CFLAGS) -o $@ $< diff --git a/tools/lz/global.c b/tools/lz/global.c new file mode 100644 index 000000000..4d26acee8 --- /dev/null +++ b/tools/lz/global.c @@ -0,0 +1,35 @@ +#include "proto.h" + +const struct compressor compressors[] = { + // NOTE: the "flags" field for each compressor will be set to the chosen/current method number minus the base + // number for that particular compressor. That means that each compressor will use a zero-based flags value. + {.methods = 72, .name = "singlepass", .function = &try_compress_single_pass}, // 0-71 + {.methods = 2, .name = "null", .function = &store_uncompressed}, // 72-73 + {.methods = 6, .name = "repetitions", .function = &try_compress_repetitions}, // 74-79 + {.methods = 16, .name = "multipass", .function = &try_compress_multi_pass}, // 80-95 + {0} // end of the list +}; + +const unsigned char bit_flipping_table[] = { + // For each byte, the table contains that same byte with its bits flipped around (for instance, + // 0x58 (01011000 binary) becomes 0x1a (00011010 binary)). This is faster than flipping bits + // manually at runtime. + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff +}; + +char option_name_buffer[] = "-?"; // used to extract the name of a short option (separated from its argument) diff --git a/tools/lz/main.c b/tools/lz/main.c new file mode 100644 index 000000000..62e6dc413 --- /dev/null +++ b/tools/lz/main.c @@ -0,0 +1,54 @@ +#include "proto.h" + +int main (int argc, char ** argv) { + struct options options = get_options(argc, argv); + unsigned short size; + unsigned char * file_buffer = read_file_into_buffer(options.input, &size); + struct command * commands; + if (options.mode & 2) { + unsigned short original_size = size, remainder; + commands = get_commands_from_file(file_buffer, &size, &remainder); + if (!commands) error_exit(1, "invalid command stream"); + if (options.mode == 2) { + unsigned char * uncompressed = get_uncompressed_data(commands, file_buffer, &size); + if (!uncompressed) error_exit(1, "output data is too large"); + write_raw_data_to_file(options.output, uncompressed, size); + free(uncompressed); + } else + write_commands_and_padding_to_textfile(options.output, commands, size, file_buffer, original_size - remainder, remainder); + } else { + commands = compress(file_buffer, &size, options.method); + (options.mode ? write_commands_to_textfile : write_commands_to_file)(options.output, commands, size, file_buffer, options.alignment); + } + free(file_buffer); + free(commands); + return 0; +} + +struct command * compress (const unsigned char * data, unsigned short * size, unsigned method) { + unsigned char * bitflipped = malloc(*size); + unsigned current; + for (current = 0; current < *size; current ++) bitflipped[current] = bit_flipping_table[data[current]]; + const struct compressor * compressor = compressors; + struct command * result; + if (method < COMPRESSION_METHODS) { + while (method >= compressor -> methods) method -= (compressor ++) -> methods; + result = compressor -> function(data, bitflipped, size, method); + } else { + struct command * compressed_sequences[COMPRESSION_METHODS]; + unsigned short lengths[COMPRESSION_METHODS]; + unsigned flags = 0; + for (current = 0; current < COMPRESSION_METHODS; current ++) { + lengths[current] = *size; + if (flags == compressor -> methods) { + flags = 0; + compressor ++; + } + compressed_sequences[current] = compressor -> function(data, bitflipped, lengths + current, flags ++); + } + result = select_optimal_sequence(compressed_sequences, lengths, size); + for (current = 0; current < COMPRESSION_METHODS; current ++) free(compressed_sequences[current]); + } + free(bitflipped); + return result; +} diff --git a/tools/lz/merging.c b/tools/lz/merging.c new file mode 100644 index 000000000..79abb6d2e --- /dev/null +++ b/tools/lz/merging.c @@ -0,0 +1,102 @@ +#include "proto.h" + +struct command * select_optimal_sequence (struct command ** sequences, const unsigned short * lengths, unsigned short * final_length) { + struct command * compressor_sequences[NUM_COMPRESSORS * 2]; + unsigned short compressor_lengths[NUM_COMPRESSORS * 2]; + struct command * inverted_sequences[COMPRESSION_METHODS]; + unsigned short inverted_lengths[COMPRESSION_METHODS]; + unsigned p, current, method = 0; + for (current = 0; current < NUM_COMPRESSORS; current ++) { + compressor_sequences[current] = select_command_sequence(sequences + method, lengths + method, compressors[current].methods, compressor_lengths + current); + compressor_sequences[current + NUM_COMPRESSORS] = select_command_sequence(sequences + method, lengths + method, -(int) compressors[current].methods, + compressor_lengths + (current + NUM_COMPRESSORS)); + for (p = 0; p < compressors[current].methods; p ++) { + inverted_sequences[method + compressors[current].methods - 1 - p] = sequences[method + p]; + inverted_lengths[method + compressors[current].methods - 1 - p] = lengths[method + p]; + } + method += compressors[current].methods; + } + unsigned short final_lengths[8]; + struct command * final_sequences[8] = { + select_command_sequence(compressor_sequences, compressor_lengths, NUM_COMPRESSORS, final_lengths), + select_command_sequence(compressor_sequences, compressor_lengths, -NUM_COMPRESSORS, final_lengths + 1), + select_command_sequence(compressor_sequences + NUM_COMPRESSORS, compressor_lengths + NUM_COMPRESSORS, NUM_COMPRESSORS, final_lengths + 2), + select_command_sequence(compressor_sequences + NUM_COMPRESSORS, compressor_lengths + NUM_COMPRESSORS, -NUM_COMPRESSORS, final_lengths + 3), + select_command_sequence(sequences, lengths, COMPRESSION_METHODS, final_lengths + 4), + select_command_sequence(sequences, lengths, -COMPRESSION_METHODS, final_lengths + 5), + select_command_sequence(inverted_sequences, inverted_lengths, COMPRESSION_METHODS, final_lengths + 6), + select_command_sequence(inverted_sequences, inverted_lengths, -COMPRESSION_METHODS, final_lengths + 7) + }; + for (current = 0; current < (2 * NUM_COMPRESSORS); current ++) free(compressor_sequences[current]); + struct command * result = select_command_sequence(final_sequences, final_lengths, 8, final_length); + for (current = 0; current < 8; current ++) free(final_sequences[current]); + return result; +} + +struct command * select_command_sequence (struct command ** sequences, const unsigned short * lengths, int count, unsigned short * final_length) { + // negative count indicates iterating backwards + unsigned short min_sequence = 0, min_length = compressed_length(*sequences, *lengths); + unsigned short seq, len; + int backwards = 0; + if (count < 0) { + backwards = 1; + count = -count; + } + for (seq = 1; seq < count; seq ++) { + len = compressed_length(sequences[seq], lengths[seq]); + if (len < min_length) { + min_sequence = seq; + min_length = len; + } + } + *final_length = lengths[min_sequence]; + struct command * current = malloc(*final_length * sizeof(struct command)); + memcpy(current, sequences[min_sequence], *final_length * sizeof(struct command)); + struct command * new; + for (seq = 1; seq < count; seq ++) { + if (backwards) seq = count - seq; + new = merge_command_sequences(current, *final_length, sequences[(seq + min_sequence) % count], lengths[(seq + min_sequence) % count], final_length); + if (backwards) seq = count - seq; // restore the value for the loop + free(current); + current = new; + } + return current; +} + +struct command * merge_command_sequences (const struct command * current, unsigned short current_length, const struct command * new, unsigned short new_length, + unsigned short * result_length) { + struct command * result = malloc(sizeof(struct command) * (current_length + new_length)); + struct command * current_command = result; + const struct command * saved_current; + const struct command * saved_new; + unsigned short current_pos, new_pos; + while (current_length) { + if (current -> count == new -> count) { + *(current_command ++) = pick_best_command(2, *(current ++), *(new ++)); + current_length --; + continue; + } + saved_current = current; + saved_new = new; + current_pos = (current ++) -> count; + new_pos = (new ++) -> count; + current_length --; + while (current_pos != new_pos) + if (current_pos < new_pos) { + current_pos += (current ++) -> count; + current_length --; + } else + new_pos += (new ++) -> count; + current_pos = compressed_length(saved_current, current - saved_current); + new_pos = compressed_length(saved_new, new - saved_new); + if (new_pos < current_pos) { + memcpy(current_command, saved_new, sizeof(struct command) * (new - saved_new)); + current_command += new - saved_new; + } else { + memcpy(current_command, saved_current, sizeof(struct command) * (current - saved_current)); + current_command += current - saved_current; + } + } + *result_length = current_command - result; + return result; +} diff --git a/tools/lz/mpcomp.c b/tools/lz/mpcomp.c new file mode 100644 index 000000000..5961be8eb --- /dev/null +++ b/tools/lz/mpcomp.c @@ -0,0 +1,112 @@ +#include "proto.h" + +/* + Multi-pass compressor: performs an initial pass generating a single command for each byte position in the data and + refines the command stream further in subsequent passes. + Methods defined: 16 + Flags values: the flags are a bitfield; each bit triggers some alternate behavior if set: + 1: always emit a literal command (0) for the first byte of the file + 2: when reducing a two-byte repetition (2) command in the overlap elimination pass, don't force it to contain a + whole number of repetitions (i.e., an even count) + 4: don't emit copy commands (4, 5, 6) with a count of 3 + 8: don't emit single-byte repetition (1) commands +*/ + +struct command * try_compress_multi_pass (const unsigned char * data, const unsigned char * flipped, unsigned short * size, unsigned flags) { + struct command * result = calloc(*size, sizeof(struct command)); + unsigned char * reversed = malloc(*size); + short * sources = malloc(*size * sizeof(short)); + unsigned short pos, next, current = 0; + for (pos = 0; pos < *size; pos ++) { + reversed[pos] = data[*size - 1 - pos]; + sources[pos] = -1; + } + for (pos = (flags & 1); pos < *size; pos += (result[pos].count >= MULTIPASS_SKIP_THRESHOLD) ? result[pos].count : 1) { + result[pos] = pick_command_for_pass(data, flipped, reversed, sources, *size, pos, flags); + if ((result[pos].command >= 4) || (result[pos].count < MULTIPASS_SKIP_THRESHOLD)) sources[current ++] = pos; + } + free(reversed); + free(sources); + for (pos = 0; pos < *size; pos ++) { + for (current = 1; current < result[pos].count; current ++) if (result[pos + current].count > result[pos].count) { + result[pos].count = current; + if ((result[pos].command == 2) && (current & 1) && !(flags & 2)) result[pos].count --; + } + if (result[pos].count <= command_size(result[pos])) result[pos] = (struct command) {.command = 0, .count = 0}; + } + for (pos = 0; pos < *size; pos ++) + if (!result[pos].command) { + for (current = 1; (current < MAX_COMMAND_COUNT) && ((pos + current) < *size); current ++) if (result[pos + current].command) break; + result[pos] = (struct command) {.command = 0, .count = current, .value = pos}; + } else if (result[pos].count > MAX_COMMAND_COUNT) { + result[pos + MAX_COMMAND_COUNT] = result[pos]; + result[pos + MAX_COMMAND_COUNT].count -= MAX_COMMAND_COUNT; + if ((result[pos + MAX_COMMAND_COUNT].command >= 4) && (result[pos + MAX_COMMAND_COUNT].value >= 0)) + result[pos + MAX_COMMAND_COUNT].value += (result[pos].command == 6) ? -MAX_COMMAND_COUNT : MAX_COMMAND_COUNT; + result[pos].count = MAX_COMMAND_COUNT; + } + for (next = pos = 0; pos < *size; pos ++) + if (pos == next) + next += result[pos].count; + else + result[pos].command = 7; + repack(&result, size); + return result; +} + +struct command pick_command_for_pass (const unsigned char * data, const unsigned char * flipped, const unsigned char * reversed, const short * sources, + unsigned short length, unsigned short position, unsigned flags) { + struct command result = pick_repetition_for_pass(data, length, position, flags); + if (result.count >= MULTIPASS_SKIP_THRESHOLD) return result; + unsigned char p; + for (p = 0; p < 3; p ++) { + struct command temp = pick_copy_for_pass(data, p[(const unsigned char * []) {data, flipped, reversed}], sources, p + 4, length, position, flags); + if (temp.command == 7) continue; + if (temp.count > result.count) result = temp; + } + if ((result.command >= 4) && (result.value >= (position - LOOKBACK_LIMIT))) result.value -= position; + return result; +} + +struct command pick_repetition_for_pass (const unsigned char * data, unsigned short length, unsigned short position, unsigned flags) { + unsigned short p; + if (data[position]) { + if ((position + 1) >= length) return (struct command) {.command = 1, .count = 1, .value = data[position]}; + struct command result; + if (!(flags & 8) && (data[position] == data[position + 1])) + result = (struct command) {.command = 1, .value = data[position]}; + else + result = (struct command) {.command = 2, .value = data[position] | (data[position + 1] << 8)}; + for (p = 1; ((position + p) < length) && (p < LOOKAHEAD_LIMIT); p ++) if (data[position + p] != data[position + (p & 1)]) break; + result.count = p; + return result; + } else { + for (p = position + 1; (p < length) && (p < (position + LOOKAHEAD_LIMIT)); p ++) if (data[p]) break; + return (struct command) {.command = 3, .count = p - position}; + } +} + +struct command pick_copy_for_pass (const unsigned char * data, const unsigned char * reference, const short * sources, unsigned char command_type, + unsigned short length, unsigned short position, unsigned flags) { + struct command result = {.command = 7, .count = (flags & 4) ? 4 : 3}; + if (length < 3) return result; + unsigned refpos, count; + const unsigned char * current; + unsigned char buffer[6] = {0}; + memcpy(buffer, reference + length - 3, 3); + while (*sources >= 0) { + refpos = *(sources ++); + if (command_type == 6) refpos = length - 1 - refpos; + if (refpos >= (length - 3)) + current = buffer + refpos - (length - 3); + else + current = reference + refpos; + if (memcmp(data + position, current, 4)) continue; + for (count = 4; (count < (length - position)) && (count < (length - refpos)); count ++) if (data[position + count] != current[count]) break; + if (count > (length - refpos)) count = length - refpos; + if (count > (length - position)) count = length - position; + if (result.count > count) continue; + result = (struct command) {.command = command_type, .count = count, .value = sources[-1]}; + } + return result; +} diff --git a/tools/lz/nullcomp.c b/tools/lz/nullcomp.c new file mode 100644 index 000000000..d4535bd39 --- /dev/null +++ b/tools/lz/nullcomp.c @@ -0,0 +1,20 @@ +#include "proto.h" + +/* + Null compressor: stores data uncompressed, using literal (0) commands only. + Methods defined: 2 + Flags values: 0 = split a trailing 33-to-64-byte block at the end into two short blocks; 1 = don't +*/ + +struct command * store_uncompressed (__attribute__((unused)) const unsigned char * data, __attribute__((unused)) const unsigned char * bitflipped, unsigned short * size, unsigned flags) { + unsigned short position, block, remainder = *size; + struct command * result = NULL; + *size = 0; + for (position = 0; remainder; position += block, remainder -= block) { + block = (remainder > MAX_COMMAND_COUNT) ? MAX_COMMAND_COUNT : remainder; + if (!(flags & 1) && (block <= (2 * SHORT_COMMAND_COUNT)) && (block > SHORT_COMMAND_COUNT)) block = SHORT_COMMAND_COUNT; + result = realloc(result, sizeof(struct command) * (1 + *size)); + result[(*size) ++] = (struct command) {.command = 0, .count = block, .value = position}; + } + return result; +} diff --git a/tools/lz/options.c b/tools/lz/options.c new file mode 100644 index 000000000..8e5d05a7b --- /dev/null +++ b/tools/lz/options.c @@ -0,0 +1,141 @@ +#include "proto.h" + +struct options get_options (int argc, char ** argv) { + struct options result = {.input = NULL, .output = NULL, .mode = 0, .alignment = 0, .method = COMPRESSION_METHODS}; + const char * program_name = *argv; + int compressor = -1; + if (argc == 1) usage(program_name); + for (argv ++; *argv; argv ++) { + if (**argv != '-') break; + if (!1[*argv]) break; + if (!strcmp(*argv, "--")) { + argv ++; + break; + } else if (!(strcmp(*argv, "--text") && strcmp(*argv, "-t"))) + result.mode = 1; + else if (!(strcmp(*argv, "--binary") && strcmp(*argv, "-b"))) + result.mode = 0; + else if (!(strcmp(*argv, "--uncompress") && strcmp(*argv, "-u"))) + result.mode = 2; + else if (!(strcmp(*argv, "--dump") && strcmp(*argv, "-d"))) + result.mode = 3; + else if (!(strcmp(*argv, "--align") && strncmp(*argv, "-a", 2))) + result.alignment = parse_numeric_option_argument(&argv, 12); + else if (!(strcmp(*argv, "--method") && strncmp(*argv, "-m", 2))) + result.method = parse_numeric_option_argument(&argv, COMPRESSION_METHODS - 1); + else if (!(strcmp(*argv, "--compressor") && strncmp(*argv, "-c", 2))) + compressor = parse_compressor_option_argument(&argv); + else if (!(strcmp(*argv, "--optimize") && strcmp(*argv, "-o"))) { + result.method = COMPRESSION_METHODS; + compressor = -1; + } else if (!(strcmp(*argv, "--help") && strcmp(*argv, "-?"))) + usage(program_name); + else if (!(strcmp(*argv, "--list") && strcmp(*argv, "-l"))) + list_compressors(); + else + error_exit(3, "unknown option: %s", *argv); + } + if (compressor >= 0) { + if (result.method >= COMPRESSION_METHODS) result.method = 0; + if (result.method >= compressors[compressor].methods) + error_exit(3, "method for the %s compressor must be between 0 and %u", compressors[compressor].name, compressors[compressor].methods - 1); + while (compressor > 0) result.method += compressors[-- compressor].methods; + } + if (*argv) { + if (strcmp(*argv, "-")) result.input = *argv; + if (*(++ argv)) { + if (argv[1]) error_exit(3, "too many command-line arguments"); + if (strcmp(*argv, "-")) result.output = *argv; + } + } + return result; +} + +unsigned parse_numeric_option_argument (char *** alp, unsigned limit) { + const char * option; + const char * value = get_argument_for_option(alp, &option); + char * error; + unsigned long result = strtoul(value, &error, 10); + if (*error) error_exit(3, "invalid argument to option %s", option); + if (result > limit) error_exit(3, "argument to option %s must be between 0 and %u", option, limit); + return result; +} + +int parse_compressor_option_argument (char *** alp) { + const char * name = get_argument_for_option(alp, NULL); + if (!strcmp(name, "*")) return -1; + int result = -1; + unsigned length = strlen(name); + const struct compressor * compressor; + for (compressor = compressors; compressor -> name; compressor ++) { + if (strncmp(name, compressor -> name, length)) continue; + if (result >= 0) error_exit(3, "ambiguous compressor prefix: %s", name); + result = compressor - compressors; + } + if (result < 0) error_exit(3, "unknown compressor: %s", name); + return result; +} + +const char * get_argument_for_option (char *** alp, const char ** option_name) { + // alp: argument list pointer (i.e., address of the current value of argv after indexing) + // will point at the last consumed argument on exit (since the caller will probably increment it once more) + const char * option; + const char * result; + if (1[**alp] == '-') { + option = *((*alp) ++); + result = **alp; + } else { + option_name_buffer[1] = 1[**alp]; + option = option_name_buffer; + result = **alp + 2; + } + if (!(result && *result)) error_exit(3, "option %s requires an argument", option); + if (option_name) *option_name = option; + return result; +} + +noreturn usage (const char * program_name) { + fprintf(stderr, "Usage: %s [<options>] [<source file> [<output>]]\n\n", program_name); + fputs("Execution mode:\n", stderr); + fputs(" -b, --binary Output the command stream as binary data (default).\n", stderr); + fputs(" -t, --text Output the command stream as text.\n", stderr); + fputs(" -u, --uncompress Process a compressed file and output the original data.\n", stderr); + fputs(" -d, --dump Process a compressed file and dump the command stream as\n", stderr); + fputs(" text (as if compressed with the --text option).\n", stderr); + fputs(" -l, --list List compressors and their method numbers.\n", stderr); + fputs(" -?, --help Print this help text and exit.\n", stderr); + fputs("Compression options:\n", stderr); + fputs(" -o, --optimize Use the best combination of compression\n", stderr); + fputs(" methods available (default).\n", stderr); + fputs(" -m<number>, --method <number> Use only one specific compression method.\n", stderr); + fprintf(stderr, " Valid method numbers are between 0 and %u.\n", COMPRESSION_METHODS - 1); + fputs(" -c<name>, --compressor <name> Use the specified compressor: the method\n", stderr); + fputs(" number will be relative to that compressor.\n", stderr); + fputs(" Any prefix of the compressor name may be\n", stderr); + fputs(" specified. Use * to indicate any compressor.\n", stderr); + fputs(" -a<number>, --align <number> Pad the compressed output with zeros until\n", stderr); + fputs(" the size has the specified number of low bits\n", stderr); + fputs(" cleared (default: 0).\n", stderr); + fputs("The source and output filenames can be given as - (or omitted) to use standard\n", stderr); + fputs("input and output. Use -- to indicate that subsequent arguments are file names.\n", stderr); + exit(3); +} + +noreturn list_compressors (void) { + const struct compressor * compressor; + unsigned current, length = 10; + for (compressor = compressors; compressor -> name; compressor ++) if ((current = strlen(compressor -> name)) > length) length = current; + fprintf(stderr, "%-*s Offset Methods\n", length, "Compressor"); + for (current = 0; current < length; current ++) putc('-', stderr); + fputs(" ------ -------\n", stderr); + current = 0; + for (compressor = compressors; compressor -> name; compressor ++) { + fprintf(stderr, "%-*s %6u %7u\n", length, compressor -> name, current, compressor -> methods); + current += compressor -> methods; + } + putc('\n', stderr); + fputs("Note: the offset indicates the compressor's lowest method number when the\n", stderr); + fputs("--compressor option is not given. When that option is used, every compressor's\n", stderr); + fputs("methods are numbered from zero.\n", stderr); + exit(3); +} diff --git a/tools/lz/output.c b/tools/lz/output.c new file mode 100644 index 000000000..43e7ba928 --- /dev/null +++ b/tools/lz/output.c @@ -0,0 +1,138 @@ +#include "proto.h" + +void write_commands_to_textfile (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream, + unsigned char alignment) { + FILE * fp = file ? fopen(file, "w") : stdout; + if (!fp) error_exit(1, "could not open file %s for writing", file); + unsigned length = 0; + while (count --) { + write_command_to_textfile(fp, *commands, input_stream); + length += command_size(*(commands ++)); + } + if (fputs("\tlzend\n", fp) < 0) error_exit(1, "could not write terminator to compressed output"); + length = ~length & ((1 << alignment) - 1); + if (length --) { + int rv = fputs("\tdb 0", fp); + while ((rv >= 0) && length --) rv = fputs(", 0", fp); + if (rv >= 0) rv = -(putc('\n', fp) == EOF); + if (rv < 0) error_exit(1, "could not write padding to compressed output"); + } + if (file) fclose(fp); +} + +void write_commands_and_padding_to_textfile (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream, + unsigned padding_offset, unsigned padding_size) { + FILE * fp = file ? fopen(file, "w") : stdout; + if (!fp) error_exit(1, "could not open file %s for writing", file); + while (count --) write_command_to_textfile(fp, *(commands ++), input_stream); + if (fputs("\tlzend\n", fp) < 0) error_exit(1, "could not write terminator to compressed output"); + if (padding_size) { + input_stream += padding_offset; + int rv = fprintf(fp, "\tdb $%02hhx", *(input_stream ++)); + while ((rv >= 0) && (-- padding_size)) rv = fprintf(fp, ", $%02hhx", *(input_stream ++)); + if (rv >= 0) rv = -(putc('\n', fp) == EOF); + if (rv < 0) error_exit(1, "could not write padding to compressed output"); + } + if (file) fclose(fp); +} + +void write_command_to_textfile (FILE * fp, struct command command, const unsigned char * input_stream) { + if ((!command.count) || (command.count > MAX_COMMAND_COUNT)) error_exit(2, "invalid command in output stream"); + int rv, pos; + const char * kind; + switch (command.command) { + case 0: + if ((rv = fprintf(fp, "\tlzdata")) < 0) break; + for (pos = 0; pos < command.count; pos ++) if ((rv = fprintf(fp, "%s$%02hhx", pos ? ", " : " ", input_stream[command.value + pos])) < 0) break; + rv = putc('\n', fp); + break; + case 1: + if ((command.value < 0) || (command.value > 255)) error_exit(2, "invalid command in output stream"); + rv = fprintf(fp, "\tlzrepeat %u, $%02hhx\n", command.count, (unsigned char) command.value); + break; + case 2: + if (command.value < 0) error_exit(2, "invalid command in output stream"); + rv = fprintf(fp, "\tlzrepeat %u, $%02hhx, $%02hhx\n", command.count, (unsigned char) command.value, (unsigned char) (command.value >> 8)); + break; + case 3: + rv = fprintf(fp, "\tlzzero %u\n", command.count); + break; + case 4: + kind = "normal"; + goto copy; + case 5: + kind = "flipped"; + goto copy; + case 6: + kind = "reversed"; + copy: + if ((command.value < -LOOKBACK_LIMIT) || (command.value >= MAX_FILE_SIZE)) error_exit(2, "invalid command in output stream"); + if (command.value < 0) + rv = fprintf(fp, "\tlzcopy %s, %u, %d\n", kind, command.count, command.value); + else + rv = fprintf(fp, "\tlzcopy %s, %u, $%04hx\n", kind, command.count, (unsigned short) command.value); + break; + default: + error_exit(2, "invalid command in output stream"); + } + if (rv < 0) error_exit(1, "could not write command to compressed output"); +} + +void write_commands_to_file (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream, unsigned char alignment) { + FILE * fp = file ? fopen(file, "wb") : stdout; + if (!fp) error_exit(1, "could not open file %s for writing", file); + unsigned length = 0; + while (count --) { + write_command_to_file(fp, *commands, input_stream); + length += command_size(*(commands ++)); + } + if (putc(-1, fp) == EOF) error_exit(1, "could not write terminator to compressed output"); + length = ~length & ((1 << alignment) - 1); + while (length --) if (putc(0, fp) == EOF) error_exit(1, "could not write padding to compressed output"); + if (file) fclose(fp); +} + +void write_command_to_file (FILE * fp, struct command command, const unsigned char * input_stream) { + if ((!command.count) || (command.count > MAX_COMMAND_COUNT)) error_exit(2, "invalid command in output stream"); + unsigned char buf[4]; + unsigned char * pos = buf; + int n; + command.count --; + if (command.count < SHORT_COMMAND_COUNT) + *(pos ++) = (command.command << 5) + command.count; + else { + *(pos ++) = 224 + (command.command << 2) + (command.count >> 8); + *(pos ++) = command.count; + } + switch (command.command) { + case 1: case 2: + if ((command.value < 0) || (command.value >= (1 << (command.command << 3)))) error_exit(2, "invalid command in output stream"); + for (n = 0; n < command.command; n ++) *(pos ++) = command.value >> (n << 3); + case 0: case 3: + break; + default: + if ((command.value < -LOOKBACK_LIMIT) || (command.value >= MAX_FILE_SIZE)) error_exit(2, "invalid command in output stream"); + if (command.value < 0) + *(pos ++) = command.value ^ 127; + else { + *(pos ++) = command.value >> 8; + *(pos ++) = command.value; + } + } + if (fwrite(buf, 1, pos - buf, fp) != (pos - buf)) error_exit(1, "could not write command to compressed output"); + if (command.command) return; + command.count ++; + if (fwrite(input_stream + command.value, 1, command.count, fp) != command.count) error_exit(1, "could not write data to compressed output"); +} + +void write_raw_data_to_file (const char * file, const void * data, unsigned length) { + FILE * fp = file ? fopen(file, "w") : stdout; + if (!fp) error_exit(1, "could not open file %s for writing", file); + while (length) { + unsigned rv = fwrite(data, 1, length, fp); + if (!rv) error_exit(1, "could not write raw data to output"); + data = (const char *) data + rv; + length -= rv; + } + if (file) fclose(fp); +} diff --git a/tools/lz/packing.c b/tools/lz/packing.c new file mode 100644 index 000000000..0cb9fae90 --- /dev/null +++ b/tools/lz/packing.c @@ -0,0 +1,56 @@ +#include "proto.h" + +void optimize (struct command * commands, unsigned short count) { + while (count && (commands -> command == 7)) commands ++, count --; + if (count < 2) return; + struct command * end = commands + count; + struct command * next; + for (next = commands + 1; next < end; next ++) { + if (next -> command == 7) continue; + if ( + !(commands -> command) && + (command_size(*next) == next -> count) && + ((commands -> count + next -> count) <= MAX_COMMAND_COUNT) && + ((commands -> count > SHORT_COMMAND_COUNT) || ((commands -> count + next -> count) <= SHORT_COMMAND_COUNT)) + ) { + commands -> count += next -> count; + next -> command = 7; + continue; + } + if (next -> command == commands -> command) + switch (commands -> command) { + case 0: + if ((commands -> value + commands -> count) != next -> value) break; + commands -> count += next -> count; + next -> command = 7; + if (commands -> count <= MAX_COMMAND_COUNT) continue; + next -> command = 0; + next -> value = commands -> value + MAX_COMMAND_COUNT; + next -> count = commands -> count - MAX_COMMAND_COUNT; + commands -> count = MAX_COMMAND_COUNT; + break; + case 1: + if (commands -> value != next -> value) break; + // fallthrough + case 3: + if ((commands -> count + next -> count) <= MAX_COMMAND_COUNT) { + commands -> count += next -> count; + next -> command = 7; + continue; + } + next -> count = (commands -> count + next -> count) - MAX_COMMAND_COUNT; + commands -> count = MAX_COMMAND_COUNT; + } + commands = next; + } +} + +void repack (struct command ** commands, unsigned short * length) { + struct command * new_commands = malloc(sizeof(struct command) * *length); + struct command * current = new_commands; + unsigned short p; + for (p = 0; p < *length; p ++) if (p[*commands].command != 7) *(current ++) = p[*commands]; + free(*commands); + *commands = new_commands; + *length = current - new_commands; +} diff --git a/tools/lz/proto.h b/tools/lz/proto.h new file mode 100644 index 000000000..845774221 --- /dev/null +++ b/tools/lz/proto.h @@ -0,0 +1,107 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +#define NUM_COMPRESSORS 4 +#define COMPRESSION_METHODS 96 /* sum of all values for the methods field in compressors */ +#define MAX_FILE_SIZE 32768 +#define SHORT_COMMAND_COUNT 32 +#define MAX_COMMAND_COUNT 1024 +#define LOOKBACK_LIMIT 128 /* highest negative valid count for a copy command */ +#define LOOKAHEAD_LIMIT 3072 /* maximum lookahead distance for the first pass of multi-pass compression */ +#define MULTIPASS_SKIP_THRESHOLD 64 + +#if __STDC_VERSION__ >= 201112L + // <noreturn.h> forces "noreturn void", which is silly and redundant; this is simpler + #define noreturn _Noreturn void +#else + #define noreturn void /* fallback */ +#endif + +struct command { + unsigned command: 3; // commands 0-6 as per compression spec; command 7 is used as a dummy placeholder + unsigned count: 12; // always equals the uncompressed data length + signed value: 17; // offset for commands 0 (into source) and 4-6 (into decompressed output); repeated bytes for commands 1-2 +}; + +struct compressor { + unsigned methods; + const char * name; + struct command * (* function) (const unsigned char *, const unsigned char *, unsigned short *, unsigned); +}; + +struct options { + const char * input; + const char * output; + unsigned method; // method to use, or >= COMPRESSION_METHODS to try them all + unsigned char mode; // 0: compress, 1: compress to text, 2: uncompress, 3: dump commands as text + unsigned char alignment; // 1 << value +}; + +// global.c +extern const struct compressor compressors[]; +extern const unsigned char bit_flipping_table[]; +extern char option_name_buffer[]; + +// main.c +int main(int, char **); +struct command * compress(const unsigned char *, unsigned short *, unsigned); + +// merging.c +struct command * select_optimal_sequence(struct command **, const unsigned short *, unsigned short *); +struct command * select_command_sequence(struct command **, const unsigned short *, int, unsigned short *); +struct command * merge_command_sequences(const struct command *, unsigned short, const struct command *, unsigned short, unsigned short *); + +// mpcomp.c +struct command * try_compress_multi_pass(const unsigned char *, const unsigned char *, unsigned short *, unsigned); +struct command pick_command_for_pass(const unsigned char *, const unsigned char *, const unsigned char *, const short *, unsigned short, + unsigned short, unsigned); +struct command pick_repetition_for_pass(const unsigned char *, unsigned short, unsigned short, unsigned); +struct command pick_copy_for_pass(const unsigned char *, const unsigned char *, const short *, unsigned char, unsigned short, unsigned short, unsigned); + +// nullcomp.c +struct command * store_uncompressed(const unsigned char *, const unsigned char *, unsigned short *, unsigned); + +// options.c +struct options get_options(int, char **); +unsigned parse_numeric_option_argument(char ***, unsigned); +int parse_compressor_option_argument(char ***); +const char * get_argument_for_option(char ***, const char **); +noreturn usage(const char *); +noreturn list_compressors(void); + +// output.c +void write_commands_to_textfile(const char *, const struct command *, unsigned, const unsigned char *, unsigned char); +void write_commands_and_padding_to_textfile(const char *, const struct command *, unsigned, const unsigned char *, unsigned, unsigned); +void write_command_to_textfile(FILE *, struct command, const unsigned char *); +void write_commands_to_file(const char *, const struct command *, unsigned, const unsigned char *, unsigned char); +void write_command_to_file(FILE *, struct command, const unsigned char *); +void write_raw_data_to_file(const char *, const void *, unsigned); + +// packing.c +void optimize(struct command *, unsigned short); +void repack(struct command **, unsigned short *); + +// repcomp.c +struct command * try_compress_repetitions(const unsigned char *, const unsigned char *, unsigned short *, unsigned); +struct command find_repetition_at_position(const unsigned char *, unsigned short, unsigned short); + +// spcomp.c +struct command * try_compress_single_pass(const unsigned char *, const unsigned char *, unsigned short *, unsigned); +struct command find_best_copy(const unsigned char *, unsigned short, unsigned short, const unsigned char *, unsigned); +unsigned short scan_forwards(const unsigned char *, unsigned short, const unsigned char *, unsigned short, short *); +unsigned short scan_backwards(const unsigned char *, unsigned short, unsigned short, short *); +struct command find_best_repetition(const unsigned char *, unsigned short, unsigned short); + +// uncomp.c +struct command * get_commands_from_file(const unsigned char *, unsigned short * restrict, unsigned short * restrict); +unsigned char * get_uncompressed_data(const struct command *, const unsigned char *, unsigned short *); + +// util.c +noreturn error_exit(int, const char *, ...); +unsigned char * read_file_into_buffer(const char *, unsigned short *); +struct command pick_best_command(unsigned, struct command, ...); +int is_better(struct command, struct command); +short command_size(struct command); +unsigned short compressed_length(const struct command *, unsigned short); diff --git a/tools/lz/repcomp.c b/tools/lz/repcomp.c new file mode 100644 index 000000000..f2bbad8a6 --- /dev/null +++ b/tools/lz/repcomp.c @@ -0,0 +1,63 @@ +#include "proto.h" + +/* + Repetitions compressor: compresses the data only using a subset of the available repetition commands. + Methods defined: 6 + Flags values: the value plus one is taken as a bitfield indicating which kinds of repetition commands are used + (lowest bit to highest: repeat single byte (1), repeat two bytes (2), repeat zeros (3)). +*/ + +struct command * try_compress_repetitions (const unsigned char * data, __attribute__((unused)) const unsigned char * bitflipped, unsigned short * size, unsigned flags) { + unsigned short pos = 0, skipped = 0; + struct command * result = malloc(*size * sizeof(struct command)); + struct command * current = result; + struct command candidate; + flags = (flags + 1) << 1; + while (pos < *size) { + candidate = find_repetition_at_position(data, pos, *size); + if ((candidate.command == 3) && !(flags & 8)) { + candidate.command = 1; + candidate.value = 0; + } + if ((candidate.command == 1) && !(flags & 2)) { + candidate.command = 2; + candidate.value |= candidate.value << 8; + } + if ((flags & (1 << candidate.command)) && (command_size(candidate) <= candidate.count)) { + if (skipped) *(current ++) = (struct command) {.command = 0, .count = skipped, .value = pos - skipped}; + skipped = 0; + *(current ++) = candidate; + pos += candidate.count; + } else { + pos ++; + if ((++ skipped) == MAX_COMMAND_COUNT) { + *(current ++) = (struct command) {.command = 0, .count = MAX_COMMAND_COUNT, .value = pos - MAX_COMMAND_COUNT}; + skipped = 0; + } + } + } + if (skipped) *(current ++) = (struct command) {.command = 0, .count = skipped, .value = pos - skipped}; + *size = current - result; + result = realloc(result, *size * sizeof(struct command)); + return result; +} + +struct command find_repetition_at_position (const unsigned char * data, unsigned short position, unsigned short length) { + if ((position + 1) >= length) return data[position] ? ((struct command) {.command = 7}) : ((struct command) {.command = 3, .count = 1}); + unsigned char value[2] = {data[position], data[position + 1]}; + unsigned repcount, limit = length - position; + if (limit > MAX_COMMAND_COUNT) limit = MAX_COMMAND_COUNT; + for (repcount = 2; (repcount < limit) && (data[position + repcount] == value[repcount & 1]); repcount ++); + struct command result; + result.count = repcount; + if (*value != value[1]) { + if (!*value && (repcount < 3)) return (struct command) {.command = 3, .count = 1}; + result.command = 2; + result.value = ((unsigned) (*value)) | (((unsigned) (value[1])) << 8); + } else if (*value) { + result.command = 1; + result.value = *value; + } else + result.command = 3; + return result; +} diff --git a/tools/lz/spcomp.c b/tools/lz/spcomp.c new file mode 100644 index 000000000..b61848365 --- /dev/null +++ b/tools/lz/spcomp.c @@ -0,0 +1,141 @@ +#include "proto.h" + +/* + Single-pass compressor: attempts to compress the data in a single pass, selecting the best command at each + position within some constraints. + Methods defined: 72 + Flags values: + Bit fields (will trigger alternate behavior if set): + 1: prefer repetition commands over copy commands of equal count + 2: don't emit a copy or repetition with a count equal to its size when the previous command is a literal (0) that + is not at maximum size (32 or 1024) + 4: don't emit long copy commands + Selector values (pick one from each group and add them to the bit fields): + - Scan delay: number of bytes that are forced into literal (0) commands after each non-literal command: + 0: 0 bytes + 8: 1 byte + 16: 2 bytes + - Copy command preference (when the command counts are tied), in order from most to least: + 0: normal (4), reversed (6), flipped (5) + 24: reversed (6), flipped (5), normal (4) + 48: flipped (5), reversed (6), normal (4) +*/ + +struct command * try_compress_single_pass (const unsigned char * data, const unsigned char * bitflipped, unsigned short * length, unsigned flags) { + struct command * commands = malloc(sizeof(struct command) * *length); + memset(commands, -1, sizeof(struct command) * *length); + struct command * current_command = commands; + unsigned short position = 0, previous_data = 0; + unsigned char scan_delay = 0, scan_delay_flag = (flags >> 3) % 3; + struct command copy, repetition; + while (position < *length) { + copy = find_best_copy(data, position, *length, bitflipped, flags); + repetition = find_best_repetition(data, position, *length); + if (flags & 1) + *current_command = pick_best_command(2, repetition, copy); + else + *current_command = pick_best_command(2, copy, repetition); + *current_command = pick_best_command(2, (struct command) {.command = 0, .count = 1, .value = position}, *current_command); + if ((flags & 2) && (command_size(*current_command) == current_command -> count)) + if (previous_data && (previous_data != SHORT_COMMAND_COUNT) && (previous_data != MAX_COMMAND_COUNT)) + *current_command = (struct command) {.command = 0, .count = 1, .value = position}; + if (scan_delay_flag) { + if (scan_delay >= scan_delay_flag) + scan_delay = 0; + else if (current_command -> command) { + scan_delay ++; + *current_command = (struct command) {.command = 0, .count = 1, .value = position}; + } + } + if (current_command -> command) + previous_data = 0; + else + previous_data += current_command -> count; + position += (current_command ++) -> count; + } + optimize(commands, current_command - commands); + repack(&commands, length); + return commands; +} + +struct command find_best_copy (const unsigned char * data, unsigned short position, unsigned short length, const unsigned char * bitflipped, unsigned flags) { + struct command simple = {.command = 7}; + struct command flipped = simple, backwards = simple; + short count, offset; + if ((count = scan_forwards(data + position, length - position, data, position, &offset))) + simple = (struct command) {.command = 4, .count = count, .value = offset}; + if ((count = scan_forwards(data + position, length - position, bitflipped, position, &offset))) + flipped = (struct command) {.command = 5, .count = count, .value = offset}; + if ((count = scan_backwards(data, length - position, position, &offset))) + backwards = (struct command) {.command = 6, .count = count, .value = offset}; + struct command command; + switch (flags / 24) { + case 0: command = pick_best_command(3, simple, backwards, flipped); break; + case 1: command = pick_best_command(3, backwards, flipped, simple); break; + case 2: command = pick_best_command(3, flipped, backwards, simple); + } + if ((flags & 4) && (command.count > SHORT_COMMAND_COUNT)) command.count = SHORT_COMMAND_COUNT; + return command; +} + +unsigned short scan_forwards (const unsigned char * target, unsigned short limit, const unsigned char * source, unsigned short real_position, short * offset) { + unsigned short best_match, best_length = 0; + unsigned short current_length; + unsigned short position; + for (position = 0; position < real_position; position ++) { + if (source[position] != *target) continue; + for (current_length = 0; (current_length < limit) && (source[position + current_length] == target[current_length]); current_length ++); + if (current_length > MAX_COMMAND_COUNT) current_length = MAX_COMMAND_COUNT; + if (current_length < best_length) continue; + best_match = position; + best_length = current_length; + } + if (!best_length) return 0; + if ((best_match + LOOKBACK_LIMIT) >= real_position) + *offset = best_match - real_position; + else + *offset = best_match; + return best_length; +} + +unsigned short scan_backwards (const unsigned char * data, unsigned short limit, unsigned short real_position, short * offset) { + if (real_position < limit) limit = real_position; + unsigned short best_match, best_length = 0; + unsigned short current_length; + unsigned short position; + for (position = 0; position < real_position; position ++) { + if (data[position] != data[real_position]) continue; + for (current_length = 0; (current_length <= position) && (current_length < limit) && + (data[position - current_length] == data[real_position + current_length]); current_length ++); + if (current_length > MAX_COMMAND_COUNT) current_length = MAX_COMMAND_COUNT; + if (current_length < best_length) continue; + best_match = position; + best_length = current_length; + } + if (!best_length) return 0; + if ((best_match + LOOKBACK_LIMIT) >= real_position) + *offset = best_match - real_position; + else + *offset = best_match; + return best_length; +} + +struct command find_best_repetition (const unsigned char * data, unsigned short position, unsigned short length) { + if ((position + 1) >= length) return data[position] ? ((struct command) {.command = 7}) : ((struct command) {.command = 3, .count = 1}); + unsigned char value[2] = {data[position], data[position + 1]}; + unsigned repcount, limit = length - position; + if (limit > MAX_COMMAND_COUNT) limit = MAX_COMMAND_COUNT; + for (repcount = 2; (repcount < limit) && (data[position + repcount] == value[repcount & 1]); repcount ++); + struct command result; + result.count = repcount; + if (*value != value[1]) { + if (!*value && (repcount < 3)) return (struct command) {.command = 3, .count = 1}; + result.command = 2; + result.value = ((unsigned) (*value)) | (((unsigned) (value[1])) << 8); + } else if (*value) { + result.command = 1; + result.value = *value; + } else + result.command = 3; + return result; +} diff --git a/tools/lz/uncomp.c b/tools/lz/uncomp.c new file mode 100644 index 000000000..3544cd93b --- /dev/null +++ b/tools/lz/uncomp.c @@ -0,0 +1,92 @@ +#include "proto.h" + +struct command * get_commands_from_file (const unsigned char * data, unsigned short * restrict size, unsigned short * restrict slack) { + struct command * result = malloc(*size * sizeof(struct command)); + unsigned short remaining = *size; + struct command * current = result; + const unsigned char * rp = data; + while (1) { + if (!(remaining --)) goto error; + current -> command = *rp >> 5; + current -> count = *(rp ++) & 31; + if (current -> command == 7) { + current -> command = current -> count >> 2; + current -> count = (current -> count & 3) << 8; + if (current -> command == 7) { + // long commands inside long commands are not allowed, but if the count is 0x300 here, it means that the original byte was 0xff + if (current -> count == 0x300) break; + goto error; + } + if (!(remaining --)) goto error; + current -> count |= *(rp ++); + } + current -> count ++; + switch (current -> command) { + case 0: + if (remaining <= current -> count) goto error; + current -> value = rp - data; + rp += current -> count; + remaining -= current -> count; + case 3: + break; + case 1: case 2: { + unsigned char p; + if (remaining <= current -> command) goto error; + current -> value = 0; + for (p = 0; p < current -> command; p ++) current -> value |= *(rp ++) << (p << 3); + remaining -= current -> command; + } break; + default: + if (!(remaining --)) goto error; + if ((current -> value = *(rp ++)) & 128) + current -> value = 127 - current -> value; + else { + if (!(remaining --)) goto error; + current -> value = (current -> value << 8) | *(rp ++); + } + } + current ++; + } + if (slack) *slack = *size - (rp - data); + *size = current - result; + return realloc(result, *size * sizeof(struct command)); + error: + free(result); + return NULL; +} + +unsigned char * get_uncompressed_data (const struct command * commands, const unsigned char * compressed, unsigned short * size) { + const struct command * limit = commands + *size; + unsigned char * result = malloc(MAX_FILE_SIZE + MAX_COMMAND_COUNT); + unsigned char * current = result; + unsigned short p; + for (; commands < limit; commands ++) { + switch (commands -> command) { + case 0: + memcpy(current, compressed + commands -> value, commands -> count); + current += commands -> count; + break; + case 1: case 2: + for (p = 0; p < commands -> count; p ++) *(current ++) = commands -> value >> ((p % commands -> command) << 3); + break; + case 3: + memset(current, 0, commands -> count); + current += commands -> count; + break; + default: { + const unsigned char * ref = ((commands -> value < 0) ? current : result) + commands -> value; + for (p = 0; p < commands -> count; p ++) { + current[p] = ref[(commands -> command == 6) ? -(int) p : p]; + if (commands -> command == 5) current[p] = bit_flipping_table[current[p]]; + } + current += commands -> count; + } + } + if ((current - result) > MAX_FILE_SIZE) { + free(result); + return NULL; + } + } + *size = current - result; + return result; +} diff --git a/tools/lz/util.c b/tools/lz/util.c new file mode 100644 index 000000000..d77ac7bcc --- /dev/null +++ b/tools/lz/util.c @@ -0,0 +1,54 @@ +#include "proto.h" + +noreturn error_exit (int error_code, const char * error, ...) { + va_list ap; + va_start(ap, error); + fputs("error: ", stderr); + vfprintf(stderr, error, ap); + va_end(ap); + fputc('\n', stderr); + exit(error_code); +} + +unsigned char * read_file_into_buffer (const char * file, unsigned short * size) { + FILE * fp = file ? fopen(file, "rb") : stdin; + if (!fp) error_exit(1, "could not open file %s for reading", file); + unsigned char * buf = malloc(MAX_FILE_SIZE + 1); + int rv = fread(buf, 1, MAX_FILE_SIZE + 1, fp); + if (file) fclose(fp); + if (rv < 0) error_exit(1, "could not read from file %s", file); + if (rv > MAX_FILE_SIZE) error_exit(1, "file %s is too big", file ? file : "<standard input>"); + *size = rv; + return buf; +} + +struct command pick_best_command (unsigned count, struct command command, ...) { + struct command result = command; + va_list ap; + va_start(ap, command); + while (-- count) { + command = va_arg(ap, struct command); + if (is_better(command, result)) result = command; + } + va_end(ap); + return result; +} + +int is_better (struct command new, struct command old) { + if (new.command == 7) return 0; + if (old.command == 7) return 1; + short new_savings = new.count - command_size(new), old_savings = old.count - command_size(old); + return new_savings > old_savings; +} + +short command_size (struct command command) { + short header_size = 1 + (command.count > SHORT_COMMAND_COUNT); + if (command.command & 4) return header_size + 1 + (command.value >= 0); + return header_size + command.command[(short []) {command.count, 1, 2, 0}]; +} + +unsigned short compressed_length (const struct command * commands, unsigned short count) { + unsigned short current, total = 0; + for (current = 0; current < count; current ++) if (commands[current].command != 7) total += command_size(commands[current]); + return total; +} diff --git a/tools/lzcomp.c b/tools/lzcomp.c deleted file mode 100644 index 562a5ec08..000000000 --- a/tools/lzcomp.c +++ /dev/null @@ -1,512 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> - -#define COMPRESSION_METHODS 72 - -struct command { - unsigned command: 3; - unsigned count: 12; - signed value: 17; -}; - -struct options { - const char * input; - const char * output; - unsigned char mode; -}; - -int main(int, char **); -struct options get_options(char **); -void usage(const char *); -void error_exit(int, const char *, ...); -void bit_flip(const unsigned char *, unsigned short, unsigned char *); -unsigned char * read_file_into_buffer(const char *, unsigned short *); -void write_commands_to_textfile(const char *, const struct command *, unsigned, const unsigned char *); -void write_command_to_textfile(FILE *, struct command, const unsigned char *); -void write_commands_to_file(const char *, const struct command *, unsigned, const unsigned char *); -void write_command_to_file(FILE *, struct command, const unsigned char *); -struct command * compress(const unsigned char *, unsigned short *); -struct command * store_uncompressed(unsigned short *); -struct command * try_compress(const unsigned char *, const unsigned char *, unsigned short *, unsigned); -struct command find_best_copy(const unsigned char *, unsigned short, unsigned short, const unsigned char *, unsigned); -unsigned short scan_forwards(const unsigned char *, unsigned short, const unsigned char *, unsigned short, short *); -unsigned short scan_backwards(const unsigned char *, unsigned short, unsigned short, short *); -struct command find_best_repetition(const unsigned char *, unsigned short, unsigned short); -struct command pick_best_command(unsigned, struct command, ...); -int is_better(struct command, struct command); -short command_size(struct command); -void optimize(struct command *, unsigned short); -void repack(struct command **, unsigned short *); -struct command * select_command_sequence(struct command **, const unsigned short *, unsigned, unsigned short *); -struct command * merge_command_sequences(const struct command *, unsigned short, const struct command *, unsigned short, unsigned short *); -unsigned short compressed_length(const struct command *, unsigned short); - -int main (int argc __attribute__((unused)), char ** argv) { - struct options options = get_options(argv); - unsigned short size; - unsigned char * file_buffer = read_file_into_buffer(options.input, &size); - struct command * compressed = compress(file_buffer, &size); - if (options.mode) - write_commands_to_textfile(options.output, compressed, size, file_buffer); - else - write_commands_to_file(options.output, compressed, size, file_buffer); - free(file_buffer); - free(compressed); - return 0; -} - -struct options get_options (char ** argv) { - struct options result = {.input = NULL, .output = NULL, .mode = 0}; - const char * program_name = *argv; - for (argv ++; *argv; argv ++) { - if (strncmp(*argv, "--", 2)) break; - if (!strcmp(*argv, "--")) { - argv ++; - break; - } else if (!strcmp(*argv, "--text")) - result.mode = 1; - else if (!strcmp(*argv, "--binary")) - result.mode = 0; - else - error_exit(3, "unknown option: %s", *argv); - } - if (!*argv) usage(program_name); - result.input = *argv; - result.output = argv[1]; - return result; -} - -void usage (const char * program_name) { - fprintf(stderr, "Usage: %s [<options>] <source file> [<compressed output>]\n\n", program_name); - fputs("Options:\n", stderr); - fputs(" --text Output the command stream as text.\n", stderr); - fputs(" --binary Output the command stream as binary data (default).\n", stderr); - fputs(" -- End of option list.\n", stderr); - exit(3); -} - -void error_exit (int error_code, const char * error, ...) { - va_list ap; - va_start(ap, error); - fputs("error: ", stderr); - vfprintf(stderr, error, ap); - fputc('\n', stderr); - exit(error_code); -} - -void bit_flip (const unsigned char * data, unsigned short length, unsigned char * result) { - unsigned char new_value, pos; - while (length --) { - new_value = 0; - for (pos = 0; pos < 8; pos ++) new_value |= ((*data >> pos) & 1) << (7 - pos); - *(result ++) = new_value; - data ++; - } -} - -unsigned char * read_file_into_buffer (const char * file, unsigned short * size) { - FILE * fp = fopen(file, "rb"); - if (!fp) error_exit(1, "could not open file %s for reading", file); - unsigned char * buf = malloc(32769); - int rv = fread(buf, 1, 32769, fp); - fclose(fp); - if (rv < 0) error_exit(1, "could not read from file %s", file); - if (rv > 32768) error_exit(1, "file %s is too big", file); - *size = rv; - return buf; -} - -void write_commands_to_textfile (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream) { - FILE * fp = file ? fopen(file, "w") : stdout; - if (!fp) error_exit(1, "could not open file %s for writing", file); - while (count --) write_command_to_textfile(fp, *(commands ++), input_stream); - if (fputs("\tlzend\n", fp) < 0) error_exit(1, "could not write terminator to compressed output"); - if (file) fclose(fp); -} - -void write_command_to_textfile (FILE * fp, struct command command, const unsigned char * input_stream) { - if ((!command.count) || (command.count > 1024)) error_exit(2, "invalid command in output stream"); - int rv = -1, pos; - const char * kind; - switch (command.command) { - case 0: - if ((rv = fprintf(fp, "\tlzdata")) < 0) break; - for (pos = 0; pos < command.count; pos ++) if ((rv = fprintf(fp, "%s$%02hhx", pos ? ", " : " ", input_stream[command.value + pos])) < 0) break; - rv = putc('\n', fp); - break; - case 1: - if ((command.value < 0) || (command.value > 255)) error_exit(2, "invalid command in output stream"); - rv = fprintf(fp, "\tlzrepeat %u, $%02hhx\n", command.count, (unsigned char) command.value); - break; - case 2: - if (command.value < 0) error_exit(2, "invalid command in output stream"); - rv = fprintf(fp, "\tlzrepeat %u, $%02hhx, $%02hhx\n", command.count, (unsigned char) command.value, (unsigned char) (command.value >> 8)); - break; - case 3: - rv = fprintf(fp, "\tlzzero %u\n", command.count); - break; - case 4: - kind = "normal"; - goto copy; - case 5: - kind = "flipped"; - goto copy; - case 6: - kind = "reversed"; - copy: - if ((command.value < -128) || (command.value > 32767)) error_exit(2, "invalid command in output stream"); - if (command.value < 0) - rv = fprintf(fp, "\tlzcopy %s, %u, %d\n", kind, command.count, command.value); - else - rv = fprintf(fp, "\tlzcopy %s, %u, $%04hx\n", kind, command.count, (unsigned short) command.value); - break; - default: - error_exit(2, "invalid command in output stream"); - } - if (rv < 0) error_exit(1, "could not write command to compressed output"); -} - -void write_commands_to_file (const char * file, const struct command * commands, unsigned count, const unsigned char * input_stream) { - FILE * fp = file ? fopen(file, "wb") : stdout; - if (!fp) error_exit(1, "could not open file %s for writing", file); - while (count --) write_command_to_file(fp, *(commands ++), input_stream); - unsigned char terminator = -1; - if (fwrite(&terminator, 1, 1, fp) != 1) error_exit(1, "could not write terminator to compressed output"); - if (file) fclose(fp); -} - -void write_command_to_file (FILE * fp, struct command command, const unsigned char * input_stream) { - if ((!command.count) || (command.count > 1024)) error_exit(2, "invalid command in output stream"); - unsigned char buf[4]; - unsigned char * pos = buf; - int n; - command.count --; - if (command.count < 32) - *(pos ++) = (command.command << 5) + command.count; - else { - *(pos ++) = 224 + (command.command << 2) + (command.count >> 8); - *(pos ++) = command.count; - } - switch (command.command) { - case 1: case 2: - if ((command.value < 0) || (command.value >= (1 << (command.command << 3)))) error_exit(2, "invalid command in output stream"); - for (n = 0; n < command.command; n ++) *(pos ++) = command.value >> (n << 3); - case 0: case 3: - break; - default: - if ((command.value < -128) || (command.value > 32767)) error_exit(2, "invalid command in output stream"); - if (command.value < 0) - *(pos ++) = command.value ^ 127; - else { - *(pos ++) = command.value >> 8; - *(pos ++) = command.value; - } - } - if (fwrite(buf, 1, pos - buf, fp) != (size_t)(pos - buf)) error_exit(1, "could not write command to compressed output"); - if (command.command) return; - command.count ++; - if (fwrite(input_stream + command.value, 1, command.count, fp) != command.count) error_exit(1, "could not write data to compressed output"); -} - -struct command * compress (const unsigned char * data, unsigned short * size) { - unsigned char * bitflipped = malloc(*size); - bit_flip(data, *size, bitflipped); - struct command * compressed_sequences[COMPRESSION_METHODS + 1]; - unsigned short lengths[COMPRESSION_METHODS + 1]; - unsigned current; - for (current = 0; current < COMPRESSION_METHODS; current ++) { - lengths[current] = *size; - compressed_sequences[current] = try_compress(data, bitflipped, lengths + current, current); - } - free(bitflipped); - lengths[COMPRESSION_METHODS] = *size; - compressed_sequences[COMPRESSION_METHODS] = store_uncompressed(lengths + COMPRESSION_METHODS); - struct command * result = select_command_sequence(compressed_sequences, lengths, COMPRESSION_METHODS + 1, size); - for (current = 0; current <= COMPRESSION_METHODS; current ++) free(compressed_sequences[current]); - return result; -} - -struct command * store_uncompressed (unsigned short * size) { - unsigned short position, block, remainder = *size; - struct command * result = NULL; - *size = 0; - for (position = 0; remainder; position += block, remainder -= block) { - block = (remainder > 1024) ? 1024 : remainder; - if ((block <= 64) && (block > 32)) block = 32; - result = realloc(result, sizeof(struct command) * (1 + *size)); - result[(*size) ++] = (struct command) {.command = 0, .count = block, .value = position}; - } - return result; -} - -struct command * try_compress (const unsigned char * data, const unsigned char * bitflipped, unsigned short * length, unsigned flags) { - struct command * commands = malloc(sizeof(struct command) * *length); - memset(commands, -1, sizeof(struct command) * *length); - struct command * current_command = commands; - unsigned short position = 0, previous_data = 0; - unsigned char lookahead = 0, lookahead_flag = (flags >> 3) % 3; - struct command copy, repetition; - while (position < *length) { - copy = find_best_copy(data, position, *length, bitflipped, flags); - repetition = find_best_repetition(data, position, *length); - if (flags & 1) - *current_command = pick_best_command(2, repetition, copy); - else - *current_command = pick_best_command(2, copy, repetition); - *current_command = pick_best_command(2, (struct command) {.command = 0, .count = 1, .value = position}, *current_command); - if (flags & 2) { - if (previous_data && (previous_data != 32) && (previous_data != 1024) && (command_size(*current_command) == current_command -> count)) - *current_command = (struct command) {.command = 0, .count = 1, .value = position}; - } - if (lookahead_flag) { - if (lookahead >= lookahead_flag) - lookahead = 0; - else if (current_command -> command) { - lookahead ++; - *current_command = (struct command) {.command = 0, .count = 1, .value = position}; - } - } - if (current_command -> command) - previous_data = 0; - else - previous_data += current_command -> count; - position += (current_command ++) -> count; - } - optimize(commands, current_command - commands); - repack(&commands, length); - return commands; -} - -struct command find_best_copy (const unsigned char * data, unsigned short position, unsigned short length, const unsigned char * bitflipped, unsigned flags) { - struct command simple = {.command = 7}; - struct command flipped = simple, backwards = simple; - short count, offset; - if ((count = scan_forwards(data + position, length - position, data, position, &offset))) - simple = (struct command) {.command = 4, .count = count, .value = offset}; - if ((count = scan_forwards(data + position, length - position, bitflipped, position, &offset))) - flipped = (struct command) {.command = 5, .count = count, .value = offset}; - if ((count = scan_backwards(data, length - position, position, &offset))) - backwards = (struct command) {.command = 6, .count = count, .value = offset}; - struct command command; - switch (flags / 24) { - case 0: command = pick_best_command(3, simple, backwards, flipped); break; - case 1: command = pick_best_command(3, backwards, flipped, simple); break; - case 2: command = pick_best_command(3, flipped, backwards, simple); - } - if ((flags & 4) && (command.count > 32)) command.count = 32; - return command; -} - -unsigned short scan_forwards (const unsigned char * target, unsigned short limit, const unsigned char * source, unsigned short real_position, short * offset) { - unsigned short best_match, best_length = 0; - unsigned short current_length; - unsigned short position; - for (position = 0; position < real_position; position ++) { - if (source[position] != *target) continue; - for (current_length = 0; (current_length < limit) && (source[position + current_length] == target[current_length]); current_length ++); - if (current_length > 1024) current_length = 1024; - if (current_length < best_length) continue; - best_match = position; - best_length = current_length; - } - if (!best_length) return 0; - if ((best_match + 128) >= real_position) - *offset = best_match - real_position; - else - *offset = best_match; - return best_length; -} - -unsigned short scan_backwards (const unsigned char * data, unsigned short limit, unsigned short real_position, short * offset) { - if (real_position < limit) limit = real_position; - unsigned short best_match, best_length = 0; - unsigned short current_length; - unsigned short position; - for (position = 0; position < real_position; position ++) { - if (data[position] != data[real_position]) continue; - for (current_length = 0; (current_length <= position) && (current_length < limit) && - (data[position - current_length] == data[real_position + current_length]); current_length ++); - if (current_length > 1024) current_length = 1024; - if (current_length < best_length) continue; - best_match = position; - best_length = current_length; - } - if (!best_length) return 0; - if ((best_match + 128) >= real_position) - *offset = best_match - real_position; - else - *offset = best_match; - return best_length; -} - -struct command find_best_repetition (const unsigned char * data, unsigned short position, unsigned short length) { - if ((position + 1) >= length) return data[position] ? ((struct command) {.command = 7}) : ((struct command) {.command = 3, .count = 1}); - unsigned char value[2] = {data[position], data[position + 1]}; - unsigned repcount, limit = length - position; - if (limit > 1024) limit = 1024; - for (repcount = 2; (repcount < limit) && (data[position + repcount] == value[repcount & 1]); repcount ++); - struct command result; - result.count = repcount; - if (*value != value[1]) { - if (!*value && (repcount < 3)) return (struct command) {.command = 3, .count = 1}; - result.command = 2; - result.value = ((unsigned) (*value)) | (((unsigned) (value[1])) << 8); - } else if (*value) { - result.command = 1; - result.value = *value; - } else - result.command = 3; - return result; -} - -struct command pick_best_command (unsigned count, struct command command, ...) { - struct command result = command; - va_list ap; - va_start(ap, command); - while (-- count) { - command = va_arg(ap, struct command); - if (is_better(command, result)) result = command; - } - va_end(ap); - return result; -} - -int is_better (struct command new, struct command old) { - if (new.command == 7) return 0; - if (old.command == 7) return 1; - short new_savings = new.count - command_size(new), old_savings = old.count - command_size(old); - return new_savings > old_savings; -} - -short command_size (struct command command) { - short header_size = 1 + (command.count > 32); - if (command.command & 4) return header_size + 1 + (command.value >= 0); - return header_size + command.command[(short []) {command.count, 1, 2, 0}]; -} - -void optimize (struct command * commands, unsigned short count) { - while (count && (commands -> command == 7)) commands ++, count --; - if (count < 2) return; - struct command * end = commands + count; - struct command * next = commands + 1; - while (next < end) { - if (next -> command == 7) goto skip; - if ( - !(commands -> command) && - (command_size(*next) == next -> count) && - ((commands -> count + next -> count) <= 1024) && - ((commands -> count > 32) || ((commands -> count + next -> count) <= 32)) - ) { - commands -> count += next -> count; - next -> command = 7; - goto skip; - } - if (next -> command != commands -> command) goto accept; - switch (commands -> command) { - case 0: - if ((commands -> value + commands -> count) != next -> value) break; - commands -> count += next -> count; - next -> command = 7; - if (commands -> count <= 1024) goto skip; - next -> command = 0; - next -> value = commands -> value + 1024; - next -> count = commands -> count - 1024; - commands -> count = 1024; - break; - case 1: - if (commands -> value != next -> value) break; - // fall through - case 3: - if ((commands -> count + next -> count) <= 1024) { - commands -> count += next -> count; - next -> command = 7; - goto skip; - } - next -> count = (commands -> count + next -> count) - 1024; - commands -> count = 1024; - break; - } - accept: - commands = next; - skip: - next ++; - } -} - -void repack (struct command ** commands, unsigned short * length) { - struct command * new_commands = malloc(sizeof(struct command) * *length); - struct command * current = new_commands; - unsigned short p; - for (p = 0; p < *length; p ++) if (p[*commands].command != 7) *(current ++) = p[*commands]; - free(*commands); - *commands = new_commands; - *length = current - new_commands; -} - -struct command * select_command_sequence (struct command ** sequences, const unsigned short * lengths, unsigned count, unsigned short * final_length) { - unsigned short min_sequence = 0, min_length = compressed_length(*sequences, *lengths); - unsigned short seq, len; - for (seq = 1; seq < count; seq ++) { - len = compressed_length(sequences[seq], lengths[seq]); - if (len < min_length) { - min_sequence = seq; - min_length = len; - } - } - *final_length = lengths[min_sequence]; - struct command * current = malloc(*final_length * sizeof(struct command)); - memcpy(current, sequences[min_sequence], *final_length * sizeof(struct command)); - struct command * new; - for (seq = 1; seq < count; seq ++) { - new = merge_command_sequences(current, *final_length, sequences[(seq + min_sequence) % count], lengths[(seq + min_sequence) % count], final_length); - free(current); - current = new; - } - return current; -} - -struct command * merge_command_sequences (const struct command * current, unsigned short current_length, const struct command * new, unsigned short new_length, - unsigned short * result_length) { - struct command * result = malloc(sizeof(struct command) * (current_length + new_length)); - struct command * current_command = result; - const struct command * saved_current; - const struct command * saved_new; - unsigned short current_pos, new_pos; - while (current_length) { - if (current -> count == new -> count) { - *(current_command ++) = pick_best_command(2, *(current ++), *(new ++)); - current_length --; - continue; - } - saved_current = current; - saved_new = new; - current_pos = (current ++) -> count; - new_pos = (new ++) -> count; - current_length --; - while (current_pos != new_pos) - if (current_pos < new_pos) { - current_pos += (current ++) -> count; - current_length --; - } else - new_pos += (new ++) -> count; - current_pos = compressed_length(saved_current, current - saved_current); - new_pos = compressed_length(saved_new, new - saved_new); - if (new_pos < current_pos) { - memcpy(current_command, saved_new, sizeof(struct command) * (new - saved_new)); - current_command += new - saved_new; - } else { - memcpy(current_command, saved_current, sizeof(struct command) * (current - saved_current)); - current_command += current - saved_current; - } - } - *result_length = current_command - result; - return result; -} - -unsigned short compressed_length (const struct command * commands, unsigned short count) { - unsigned short current, total = 0; - for (current = 0; current < count; current ++) if (commands[current].command != 7) total += command_size(commands[current]); - return total; -} diff --git a/tools/mapreader.py b/tools/mapreader.py index 8a29e5235..de2ec3423 100644 --- a/tools/mapreader.py +++ b/tools/mapreader.py @@ -38,10 +38,11 @@ class MapReader: bank_types = { 'HRAM' : { 'size': 0x80, 'banked': False, }, 'OAM' : { 'size': 0xA0, 'banked': False, }, - 'ROM Bank' : { 'size': 0x4000, 'banked': True, }, - 'SRAM Bank': { 'size': 0x2000, 'banked': True, }, - 'VRAM Bank': { 'size': 0x1000, 'banked': True, }, - 'WRAM Bank': { 'size': 0x2000, 'banked': True, }, + 'ROM0 bank': { 'size': 0x4000, 'banked': True, }, + 'ROMX bank': { 'size': 0x4000, 'banked': True, }, + 'SRAM bank': { 'size': 0x2000, 'banked': True, }, + 'VRAM bank': { 'size': 0x1000, 'banked': True, }, + 'WRAM bank': { 'size': 0x2000, 'banked': True, }, } # FSM states @@ -52,7 +53,7 @@ class MapReader: # $506D = TypeMatchups section_data_regex = re.compile('\$([0-9A-Fa-f]{4}) = (.*)') # $3ED2 bytes - slack_regex = re.compile('\$([0-9A-Fa-f]{4}) bytes') + slack_regex = re.compile('\$([0-9A-Fa-f]{4}) bytes?') def __init__(self, *args, **kwargs): self.__dict__.update(kwargs) diff --git a/tools/md5.c b/tools/md5.c deleted file mode 100644 index 6f4f58d2d..000000000 --- a/tools/md5.c +++ /dev/null @@ -1,128 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdint.h> - -#include "common.h" - -static const int s[64] = { - 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, - 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, - 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, - 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, -}; - -static const uint32_t K[64] = { - 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, - 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, - 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, - 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, - 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, - 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, - 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, - 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, - 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, - 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, - 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, - 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, - 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, - 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, - 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, - 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, -}; - -#define rotate_left_32(value, by) \ - ((((value) << (by)) & 0xffffffff) | ((value) >> (32 - (by)))) - -void md5_wikipedia(uint8_t *data, int length, uint8_t *result) { - - uint8_t *message = calloc(length + 64, sizeof(uint8_t)); - memcpy(message, data, length); - - int64_t orig_bitlength = length * 8; - - message[length++] |= 1 << 7; - - while (length % 64 != (64 - 8)) { - length++; - } - for (int i = 0; i < 8; i++) { - message[length++] = (orig_bitlength >> (i * 8)) & 0xff; - } - - int a0 = 0x67452301; - int b0 = 0xefcdab89; - int c0 = 0x98badcfe; - int d0 = 0x10325476; - - for (int start = 0; start < length; start += 64) { - uint32_t M[16]; - for (int j = 0; j < 16; j++) { - uint8_t *word = &message[start + j * 4]; - M[j] = *word++; - M[j] |= *word++ << 8; - M[j] |= *word++ << 16; - M[j] |= *word++ << 24; - } - - int A = a0; - int B = b0; - int C = c0; - int D = d0; - for (int i = 0; i < 64; i++) { - int F, g; - switch (i / 16) { - case 0: - F = (B & C) | (~B & D); - g = i; - break; - case 1: - F = (D & B) | (~D & C); - g = (5 * i + 1) % 16; - break; - case 2: - F = B ^ C ^ D; - g = (3 * i + 5) % 16; - break; - case 3: - F = C ^ (B | ~D); - g = (7 * i) % 16; - break; - } - int e = D; - D = C; - C = B; - B = B + rotate_left_32(A + F + K[i] + M[g], s[i]); - A = e; - } - a0 += A; - b0 += B; - c0 += C; - d0 += D; - } - - int values[] = {a0, b0, c0, d0}; - for (int i = 0; i < 16; i++) { - int value = values[i >> 2]; - int shift = (i % 4) * 8; - result[i] = (value >> shift) & 0xff; - } - - free(message); -} - -int main(int argc, char *argv[]) { - if (argc < 2) { - exit(1); - } - char *infile = argv[1]; - int size; - uint8_t *data = read_u8(infile, &size); - - uint8_t result[16]; - md5_wikipedia(data, size, result); - for (int i = 0; i < 16; i++) { - printf("%02x", result[i]); - } - printf("\n"); -} diff --git a/tools/used_space.py b/tools/used_space.py index 3fc401f57..34f8e492d 100644 --- a/tools/used_space.py +++ b/tools/used_space.py @@ -38,7 +38,8 @@ def main(): default_bank_data = {'sections': [], 'used': 0, 'slack': bank_size} for bank in range(num_banks): hits = [0] * pixels_per_bank - data = r.bank_data['ROM Bank'].get(bank, default_bank_data) + bank_data = r.bank_data['ROM0 bank'] if bank == 0 else r.bank_data['ROMX bank'] + data = bank_data.get(bank, default_bank_data) for s in data['sections']: beg = s['beg'] & bank_mask end = s['end'] & bank_mask |