summaryrefslogtreecommitdiff
path: root/tools/pokemontools/lz.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/pokemontools/lz.py')
-rw-r--r--tools/pokemontools/lz.py580
1 files changed, 0 insertions, 580 deletions
diff --git a/tools/pokemontools/lz.py b/tools/pokemontools/lz.py
deleted file mode 100644
index aef5c641..00000000
--- a/tools/pokemontools/lz.py
+++ /dev/null
@@ -1,580 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Pokemon Crystal data de/compression.
-"""
-
-"""
-A rundown of Pokemon Crystal's compression scheme:
-
-Control commands occupy bits 5-7.
-Bits 0-4 serve as the first parameter <n> for each command.
-"""
-lz_commands = {
- 'literal': 0, # n values for n bytes
- 'iterate': 1, # one value for n bytes
- 'alternate': 2, # alternate two values for n bytes
- 'blank': 3, # zero for n bytes
-}
-
-"""
-Repeater commands repeat any data that was just decompressed.
-They take an additional signed parameter <s> to mark a relative starting point.
-These wrap around (positive from the start, negative from the current position).
-"""
-lz_commands.update({
- 'repeat': 4, # n bytes starting from s
- 'flip': 5, # n bytes in reverse bit order starting from s
- 'reverse': 6, # n bytes backwards starting from s
-})
-
-"""
-The long command is used when 5 bits aren't enough. Bits 2-4 contain a new control code.
-Bits 0-1 are appended to a new byte as 8-9, allowing a 10-bit parameter.
-"""
-lz_commands.update({
- 'long': 7, # n is now 10 bits for a new control code
-})
-max_length = 1 << 10 # can't go higher than 10 bits
-lowmax = 1 << 5 # standard 5-bit param
-
-"""
-If 0xff is encountered instead of a command, decompression ends.
-"""
-lz_end = 0xff
-
-
-bit_flipped = [
- sum(((byte >> i) & 1) << (7 - i) for i in xrange(8))
- for byte in xrange(0x100)
-]
-
-
-class Compressed:
-
- """
- Usage:
- lz = Compressed(data).output
- or
- lz = Compressed().compress(data)
- or
- c = Compressed()
- c.data = data
- lz = c.compress()
-
- There are some issues with reproducing the target compressor.
- Some notes are listed here:
- - the criteria for detecting a lookback is inconsistent
- - sometimes lookbacks that are mostly 0s are pruned, sometimes not
- - target appears to skip ahead if it can use a lookback soon, stopping the current command short or in some cases truncating it with literals.
- - this has been implemented, but the specifics are unknown
- - self.min_scores: It's unknown if blank's minimum score should be 1 or 2. Most likely it's 1, with some other hack to account for edge cases.
- - may be related to the above
- - target does not appear to compress backwards
- """
-
- def __init__(self, *args, **kwargs):
-
- self.min_scores = {
- 'blank': 1,
- 'iterate': 2,
- 'alternate': 3,
- 'repeat': 3,
- 'reverse': 3,
- 'flip': 3,
- }
-
- self.preference = [
- 'repeat',
- 'blank',
- 'flip',
- 'reverse',
- 'iterate',
- 'alternate',
- #'literal',
- ]
-
- self.lookback_methods = 'repeat', 'reverse', 'flip'
-
- self.__dict__.update({
- 'data': None,
- 'commands': lz_commands,
- 'debug': False,
- 'literal_only': False,
- })
-
- self.arg_names = 'data', 'commands', 'debug', 'literal_only'
-
- self.__dict__.update(kwargs)
- self.__dict__.update(dict(zip(self.arg_names, args)))
-
- if self.data is not None:
- self.compress()
-
- def compress(self, data=None):
- if data is not None:
- self.data = data
-
- self.data = list(bytearray(self.data))
-
- self.indexes = {}
- self.lookbacks = {}
- for method in self.lookback_methods:
- self.lookbacks[method] = {}
-
- self.address = 0
- self.end = len(self.data)
- self.output = []
- self.literal = None
-
- while self.address < self.end:
-
- if self.score():
- self.do_literal()
- self.do_winner()
-
- else:
- if self.literal == None:
- self.literal = self.address
- self.address += 1
-
- self.do_literal()
-
- self.output += [lz_end]
- return self.output
-
- def reset_scores(self):
- self.scores = {}
- self.offsets = {}
- self.helpers = {}
- for method in self.min_scores.iterkeys():
- self.scores[method] = 0
-
- def bit_flip(self, byte):
- return bit_flipped[byte]
-
- def do_literal(self):
- if self.literal != None:
- length = abs(self.address - self.literal)
- start = min(self.literal, self.address + 1)
- self.helpers['literal'] = self.data[start:start+length]
- self.do_cmd('literal', length)
- self.literal = None
-
- def score(self):
- self.reset_scores()
-
- map(self.score_literal, ['iterate', 'alternate', 'blank'])
-
- for method in self.lookback_methods:
- self.scores[method], self.offsets[method] = self.find_lookback(method, self.address)
-
- self.stop_short()
-
- return any(
- score
- > self.min_scores[method] + int(score > lowmax)
- for method, score in self.scores.iteritems()
- )
-
- def stop_short(self):
- """
- If a lookback is close, reduce the scores of other commands.
- """
- best_method, best_score = max(
- self.scores.items(),
- key = lambda x: (
- x[1],
- -self.preference.index(x[0])
- )
- )
- for method in self.lookback_methods:
- min_score = self.min_scores[method]
- for address in xrange(self.address+1, self.address+best_score):
- length, index = self.find_lookback(method, address)
- if length > max(min_score, best_score):
- # BUG: lookbacks can reduce themselves. This appears to be a bug in the target also.
- for m, score in self.scores.items():
- self.scores[m] = min(score, address - self.address)
-
-
- def read(self, address=None):
- if address is None:
- address = self.address
- if 0 <= address < len(self.data):
- return self.data[address]
- return None
-
- def find_all_lookbacks(self):
- for method in self.lookback_methods:
- for address, byte in enumerate(self.data):
- self.find_lookback(method, address)
-
- def find_lookback(self, method, address=None):
- """Temporarily stubbed, because the real function doesn't run in polynomial time."""
- return 0, None
-
- def broken_find_lookback(self, method, address=None):
- if address is None:
- address = self.address
-
- existing = self.lookbacks.get(method, {}).get(address)
- if existing != None:
- return existing
-
- lookback = 0, None
-
- # Better to not carelessly optimize at the moment.
- """
- if address < 2:
- return lookback
- """
-
- byte = self.read(address)
- if byte is None:
- return lookback
-
- direction, mutate = {
- 'repeat': ( 1, int),
- 'reverse': (-1, int),
- 'flip': ( 1, self.bit_flip),
- }[method]
-
- # Doesn't seem to help
- """
- if mutate == self.bit_flip:
- if byte == 0:
- self.lookbacks[method][address] = lookback
- return lookback
- """
-
- data_len = len(self.data)
- is_two_byte_index = lambda index: int(index < address - 0x7f)
-
- for index in self.get_indexes(mutate(byte)):
-
- if index >= address:
- break
-
- old_length, old_index = lookback
- if direction == 1:
- if old_length > data_len - index: break
- else:
- if old_length > index: continue
-
- if self.read(index) in [None]: continue
-
- length = 1 # we know there's at least one match, or we wouldn't be checking this index
- while 1:
- this_byte = self.read(address + length)
- that_byte = self.read(index + length * direction)
- if that_byte == None or this_byte != mutate(that_byte):
- break
- length += 1
-
- score = length - is_two_byte_index(index)
- old_score = old_length - is_two_byte_index(old_index)
- if score >= old_score or (score == old_score and length > old_length):
- # XXX maybe avoid two-byte indexes when possible
- if score >= lookback[0] - is_two_byte_index(lookback[1]):
- lookback = length, index
-
- self.lookbacks[method][address] = lookback
- return lookback
-
- def get_indexes(self, byte):
- if not self.indexes.has_key(byte):
- self.indexes[byte] = []
- index = -1
- while 1:
- try:
- index = self.data.index(byte, index + 1)
- except ValueError:
- break
- self.indexes[byte].append(index)
- return self.indexes[byte]
-
- def score_literal(self, method):
- address = self.address
-
- compare = {
- 'blank': [0],
- 'iterate': [self.read(address)],
- 'alternate': [self.read(address), self.read(address + 1)],
- }[method]
-
- # XXX may or may not be correct
- if method == 'alternate' and compare[0] == 0:
- return
-
- length = 0
- while self.read(address + length) == compare[length % len(compare)]:
- length += 1
-
- self.scores[method] = length
- self.helpers[method] = compare
-
- def do_winner(self):
- winners = filter(
- lambda (method, score):
- score
- > self.min_scores[method] + int(score > lowmax),
- self.scores.iteritems()
- )
- winners.sort(
- key = lambda (method, score): (
- -(score - self.min_scores[method] - int(score > lowmax)),
- self.preference.index(method)
- )
- )
- winner, score = winners[0]
-
- length = min(score, max_length)
- self.do_cmd(winner, length)
- self.address += length
-
- def do_cmd(self, cmd, length):
- start_address = self.address
-
- cmd_length = length - 1
-
- output = []
-
- if length > lowmax:
- output.append(
- (self.commands['long'] << 5)
- + (self.commands[cmd] << 2)
- + (cmd_length >> 8)
- )
- output.append(
- cmd_length & 0xff
- )
- else:
- output.append(
- (self.commands[cmd] << 5)
- + cmd_length
- )
-
- self.helpers['blank'] = [] # quick hack
- output += self.helpers.get(cmd, [])
-
- if cmd in self.lookback_methods:
- offset = self.offsets[cmd]
- # Negative offsets are one byte.
- # Positive offsets are two.
- if 0 < start_address - offset - 1 <= 0x7f:
- offset = (start_address - offset - 1) | 0x80
- output += [offset]
- else:
- output += [offset / 0x100, offset % 0x100] # big endian
-
- if self.debug:
- print ' '.join(map(str, [
- cmd, length, '\t',
- ' '.join(map('{:02x}'.format, output)),
- self.data[start_address:start_address+length] if cmd in self.lookback_methods else '',
- ]))
-
- self.output += output
-
-
-
-class Decompressed:
- """
- Interpret and decompress lz-compressed data, usually 2bpp.
- """
-
- """
- Usage:
- data = Decompressed(lz).output
- or
- data = Decompressed().decompress(lz)
- or
- d = Decompressed()
- d.lz = lz
- data = d.decompress()
-
- To decompress from offset 0x80000 in a rom:
- data = Decompressed(rom, start=0x80000).output
- """
-
- lz = None
- start = 0
- commands = lz_commands
- debug = False
-
- arg_names = 'lz', 'start', 'commands', 'debug'
-
- def __init__(self, *args, **kwargs):
- self.__dict__.update(dict(zip(self.arg_names, args)))
- self.__dict__.update(kwargs)
-
- self.command_names = dict(map(reversed, self.commands.items()))
- self.address = self.start
-
- if self.lz is not None:
- self.decompress()
-
- if self.debug: print self.command_list()
-
-
- def command_list(self):
- """
- Print a list of commands that were used. Useful for debugging.
- """
-
- text = ''
-
- output_address = 0
- for name, attrs in self.used_commands:
- length = attrs['length']
- address = attrs['address']
- offset = attrs['offset']
- direction = attrs['direction']
-
- text += '{2:03x} {0}: {1}'.format(name, length, output_address)
- text += '\t' + ' '.join(
- '{:02x}'.format(int(byte))
- for byte in self.lz[ address : address + attrs['cmd_length'] ]
- )
-
- if offset is not None:
- repeated_data = self.output[ offset : offset + length * direction : direction ]
- if name == 'flip':
- repeated_data = map(bit_flipped.__getitem__, repeated_data)
- text += ' [' + ' '.join(map('{:02x}'.format, repeated_data)) + ']'
-
- text += '\n'
- output_address += length
-
- return text
-
-
- def decompress(self, lz=None):
-
- if lz is not None:
- self.lz = lz
-
- self.lz = bytearray(self.lz)
-
- self.used_commands = []
- self.output = []
-
- while 1:
-
- cmd_address = self.address
- self.offset = None
- self.direction = None
-
- if (self.byte == lz_end):
- self.next()
- break
-
- self.cmd = (self.byte & 0b11100000) >> 5
-
- if self.cmd_name == 'long':
- # 10-bit length
- self.cmd = (self.byte & 0b00011100) >> 2
- self.length = (self.next() & 0b00000011) * 0x100
- self.length += self.next() + 1
- else:
- # 5-bit length
- self.length = (self.next() & 0b00011111) + 1
-
- self.__class__.__dict__[self.cmd_name](self)
-
- self.used_commands += [(
- self.cmd_name,
- {
- 'length': self.length,
- 'address': cmd_address,
- 'offset': self.offset,
- 'cmd_length': self.address - cmd_address,
- 'direction': self.direction,
- }
- )]
-
- # Keep track of the data we just decompressed.
- self.compressed_data = self.lz[self.start : self.address]
-
-
- @property
- def byte(self):
- return self.lz[ self.address ]
-
- def next(self):
- byte = self.byte
- self.address += 1
- return byte
-
- @property
- def cmd_name(self):
- return self.command_names.get(self.cmd)
-
-
- def get_offset(self):
-
- if self.byte >= 0x80: # negative
- # negative
- offset = self.next() & 0x7f
- offset = len(self.output) - offset - 1
- else:
- # positive
- offset = self.next() * 0x100
- offset += self.next()
-
- self.offset = offset
-
-
- def literal(self):
- """
- Copy data directly.
- """
- self.output += self.lz[ self.address : self.address + self.length ]
- self.address += self.length
-
- def iterate(self):
- """
- Write one byte repeatedly.
- """
- self.output += [self.next()] * self.length
-
- def alternate(self):
- """
- Write alternating bytes.
- """
- alts = [self.next(), self.next()]
- self.output += [ alts[x & 1] for x in xrange(self.length) ]
-
- def blank(self):
- """
- Write zeros.
- """
- self.output += [0] * self.length
-
- def flip(self):
- """
- Repeat flipped bytes from output.
-
- Example: 11100100 -> 00100111
- """
- self._repeat(table=bit_flipped)
-
- def reverse(self):
- """
- Repeat reversed bytes from output.
- """
- self._repeat(direction=-1)
-
- def repeat(self):
- """
- Repeat bytes from output.
- """
- self._repeat()
-
- def _repeat(self, direction=1, table=None):
- self.get_offset()
- self.direction = direction
- # Note: appends must be one at a time (this way, repeats can draw from themselves if required)
- for i in xrange(self.length):
- byte = self.output[ self.offset + i * direction ]
- self.output.append( table[byte] if table else byte )