Code cleanup; preparation for Japanese Battle eHEAD master

author: Háčky <hatschky@gmail.com> 2014-11-27 07:15:45 +0000
committer: Háčky <hatschky@gmail.com> 2014-11-27 07:15:45 +0000
commit: 319ed6d0e0266f5f6174a5acd929914db8cfae1d (patch)
tree: 76408d9cadcbed1e702efa8c44f4cf5e364b0a4d /scripts
parent: 17853b802692237d98f96d1a80caf2019f71753e (diff)
9 files changed, 266 insertions, 142 deletions
diff --git a/scripts/asmquote.py b/scripts/asmquote.py
new file mode 100644
index 0000000..cfc1440
--- /dev/null
+++ b/scripts/asmquote.py
@@ -0,0 +1,27 @@
+asmProblemBytes = ['\x00', '\x09', '\x0A', '\x22']
+def asmQuote(t):
+	result = ""
+	quoted = False
+	if t[0] in asmProblemBytes:
+		result = '{0}'.format(ord(t[0]))
+	else:
+		result = '"' + t[0]
+		quoted = True
+	t = t[1:]
+
+	while len(t):
+		if quoted and t[0] in asmProblemBytes:
+			result += '",{0}'.format(ord(t[0]))
+			quoted = False
+		elif quoted:
+			result += t[0]
+		elif t[0] in asmProblemBytes:
+			result += ',{0}'.format(ord(t[0]))
+			quoted = False
+		else:
+			result += ',"' + t[0]
+			quoted = True
+		t = t[1:]
+	if quoted:
+		result += '"'
+	return result
+\ No newline at end of file
diff --git a/scripts/berrychecksum.py b/scripts/berrychecksum.py
deleted file mode 100644
index a973dc2..0000000
--- a/scripts/berrychecksum.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import struct
-import sys
-
-out = open(sys.argv[2], 'w')
-sum = 0
-with open(sys.argv[1], 'rb') as f:
-	while True:
-		byte = f.read(1)
-		if not byte:
-			break
-
-		sum += ord(byte)
-		out.write(byte)
-f.closed
-
-out.write(struct.pack('<I', sum))
-\ No newline at end of file
diff --git a/scripts/checksum.py b/scripts/checksum.py
new file mode 100644
index 0000000..8b5f63c
--- /dev/null
+++ b/scripts/checksum.py
@@ -0,0 +1,89 @@
+import struct
+import sys
+
+chunk_lengths = [0,0,0,6,2,5,12,5,3,1,2,5,5,5,1,13,13]
+
+bytewises = []
+bytewise_results = []
+wordwises = []
+wordwise_results = []
+crcs = []
+crc_results = []
+
+data = ""
+with open(sys.argv[1], 'rb') as f:
+	data = f.read()
+f.closed
+
+base_address = struct.unpack('<I', data[1:5])[0]
+i = 0x11 # first chunk location
+while i < len(data):
+	chunk_type = ord(data[i])
+	if chunk_type == 0x02: # END_OF_CHUNKS
+		break
+	elif chunk_type == 0x07: # CUSTOM_BERRY
+		start_address = struct.unpack('<I', data[i+1:i+5])[0] - base_address
+		bytewises.append([start_address + 0x52C, start_address, start_address + 0x52C])
+	elif chunk_type == 0x0D: # BATTLE_TRAINER
+		start_address = struct.unpack('<I', data[i+1:i+5])[0] - base_address
+		wordwises.append([start_address + 0xB8, start_address, start_address + 0xB8])
+	elif chunk_type == 0x0F: # CHECKSUM_BYTES
+		start_address = struct.unpack('<I', data[i+5:i+9])[0] - base_address
+		end_address = struct.unpack('<I', data[i+9:i+13])[0] - base_address
+		bytewise.append([i + 1, start_address, end_address])
+	elif chunk_type == 0x10: # CHECKSUM_CRC
+		start_address = struct.unpack('<I', data[i+5:i+9])[0] - base_address
+		end_address = struct.unpack('<I', data[i+9:i+13])[0] - base_address
+		crcs.append([i + 1, start_address, end_address])
+	elif chunk_type < 0x02 or chunk_type > 0x10:
+		print "Unknown chunk {0:X}".format(chunk_type)
+		raise TypeError
+	i += chunk_lengths[chunk_type]
+
+
+# calculate and insert all wordwise checksums
+for wordwise in wordwises:
+	sum = 0
+	for i in range(wordwise[1], wordwise[2], 4):
+		sum = (sum + struct.unpack('<I', data[i:i+4])[0]) & 0xFFFFFFFF
+	wordwise_results.append(sum)
+i = 0
+for wordwise in wordwises:
+	data = data[0:wordwise[0]] + struct.pack('<I', wordwise_results[i]) + data[(wordwise[0] + 4):]
+	i += 1
+
+
+# calculate and insert all bytewise checksums
+for bytewise in bytewises:
+	sum = 0
+	for i in range(bytewise[1], bytewise[2]):
+		sum = (sum + ord(data[i])) & 0xFFFFFFFF
+	bytewise_results.append(sum)
+i = 0
+for bytewise in bytewises:
+	data = data[0:bytewise[0]] + struct.pack('<I', bytewise_results[i]) + data[(bytewise[0] + 4):]
+	i += 1
+
+
+# calculate and insert all CRC checksums
+for crc in crcs:
+	sum = 0x1121
+	for i in range(crc[1], crc[2]):
+		sum ^= ord(data[i])
+		for j in range(8):
+			if(sum & 1):
+				sum = (sum >> 1) ^ 0x8408
+			else:
+				sum >>= 1
+	sum = ~sum & 0xFFFF
+	crc_results.append(sum)
+
+i = 0
+for crc in crcs:
+	data = data[0:crc[0]] + struct.pack('<I', crc_results[i]) + data[(crc[0] + 4):]
+	i += 1
+
+
+# write the updated file
+out = open(sys.argv[2], 'w')
+out.write(data)
+\ No newline at end of file
diff --git a/scripts/ereadertext.py b/scripts/ereadertext.py
new file mode 100644
index 0000000..e2bd237
--- /dev/null
+++ b/scripts/ereadertext.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+import sys
+from asmquote import asmQuote
+
+region = sys.argv[3]
+
+out = open(sys.argv[2], 'w')
+
+with open(sys.argv[1], 'rb') as f:
+	for asm in f:
+		asms = asm.split('"')
+		command = asms[0].strip()
+		if command == "db":
+			# this is only for the American e-Reader; still need to deal with Japanese
+			asms[1] = asms[1].replace('\\0', '\x00')
+			asms[1] = asms[1].replace('\\n', '\n')
+			asms[1] = asms[1].replace('é', '\x7F')
+
+			out.write("db " + asmQuote(asms[1]) + "\n")
+		else:
+			out.write(asm)
+			if "macros.asm" in asm:
+				out.write("REGION EQU REGION_{0}\n".format(region))
+				out.write('REGION_NAME EQUS "{0}"\n'.format(region))
+
+f.closed
+\ No newline at end of file
diff --git a/scripts/charmap.py b/scripts/gen3text.py
index cdf3d0a..f235106 100644
--- a/scripts/charmap.py
+++ b/scripts/gen3text.py
@@ -12,8 +12,8 @@ chars = {
 	'か': '\x06',
 	'き': '\x07',
 	'く': '\x08',
-	'け': '\\t',
-	'こ': '\\n',
+	'け': '\x09',
+	'こ': '\x0A',
 	'さ': '\x0B',
 	'し': '\x0C',
 	'す': '\x0D',
@@ -260,6 +260,8 @@ chars = {
 	'\\p': '\xFB',
 	'\{FC}': '\xFC',
 	'\\v1': '\xFD\x01',
+	'\\v2': '\xFD\x02',
+	'\\v3': '\xFD\x03',
 	'\\n': '\xFE',
 	'@': '\xFF',
 
@@ -273,7 +275,7 @@ chars = {
 	'É': '\x06',
 	'Ê': '\x07',
 	'Ë': '\x08',
-	'Ì': '\\t',
+	'Ì': '\x09',
 	'Î': '\x0B',
 	'Ï': '\x0C',
 	'Ò': '\x0D',
@@ -295,7 +297,7 @@ chars = {
 	'ì': '\x1E',
 	'î': '\x20',
 	'ï': '\x21',
-	'ò': '",$22,"',
+	'ò': '\x22',
 	'ó': '\x23',
 	'ô': '\x24',
 	'œ': '\x25',
@@ -354,8 +356,12 @@ chars = {
 	'.': '\xAD',
 	'-': '\xAE',
 	'…': '\xB0',
-	'“': '\xB1', # replaced with « for French, „ for German
-	'”': '\xB2', # replaced with » for French, “ for German
+	'“': '\xB1', # English, Italian, Spanish
+	'«': '\xB1', # French
+	'„': '\xB1', # German
+	'”': '\xB2', # English, Italian, Spanish
+	'»': '\xB2', # French
+	#'“': '\xB2', # German — clashes with English B1
 	'‘': '\xB3',
 	'’': '\xB4',
 	'$': '\xB7',
@@ -416,89 +422,60 @@ chars = {
 	':': '\xF0',
 }
 
-region = ""
-if sys.argv[2][-6] == '-':
-	region = sys.argv[2][-5:-3].upper()
+asmProblemBytes = ['\x00', '\x09', '\x0A', '\x22']
 
-out = open(sys.argv[2], 'w')
-with open(sys.argv[1], 'r') as f:
-	for asm in f:
-		# split by quotes
-		asms = asm.split('"')
 
-		pad_length = 0
-		output = ''
-		print_macro = True
-		if asms[0].strip() == "Text":
-			asms[0] = asms[0].replace("Text", "db")
-		elif asms[0].strip() == "Tag_Text":
-			pad_length = 45
-		elif asms[0].strip() == ("Text_" + region):
-			asms[0] = asms[0].replace("Text_" + region, "db")
-		elif asms[0].find("Text_") != -1:
-			asms[0] = ";"
-		elif asms[0].find("OT_Name") == -1 and asms[0].find("Nickname") == -1 and asms[0].find("Insert_Prologue") == -1 and asms[0].find("Berry") == -1:
-			print_macro = False
-
-		if print_macro:
-			even = False
-			for token in asms:
-				if even:
-					characters = []
-					# token is a string to convert to byte values
-					while len(token):
-						# read a single UTF-8 codepoint
-						char = token[0]
-						if ord(char) < 0xc0:
-							token = token[1:]
-							# handle escape sequences
-							if char == "\\":
-								if token[0] == '{':
-									for i in range(len(token)):
-										char += token[0]
-										token = token[1:]
-										if char[-1] == '}':
-											break
-								elif token[0] == 'v':
-									char += token[0:2]
-									token = token[2:]
-								else:
-									char += token[0]
-									token = token[1:]
-						elif ord(char) < 0xe0:
-							char = char + token[1:2]
-							token = token[2:]
-						elif ord(char) < 0xf0:
-							char = char + token[1:3]
-							token = token[3:]
-						else:
-							char = char + token[1:4]
-							token = token[4:]
-						characters += [char]
-
-					line = 0
-					output += '"'
-					for char in characters:
-						if chars[char] == '\x00':
-							output += '",$00,"'
-						else:
-							output += chars[char]
-					output += '"'
+def utf8ToRSText(t, region = ""):
+	currentChars = chars
+	if region == "DE":
+		chars['“'] = '\xB2'
 
+	characters = []
+	char = ""
+	while len(t):
+		if ord(t[0]) >= 0xF0:
+			char += t[0:4]
+			t = t[4:]
+		elif ord(t[0]) >= 0xE0:
+			char += t[0:3]
+			t = t[3:]
+		elif ord(t[0]) >= 0xC0:
+			char += t[0:2]
+			t = t[2:]
+		else:
+			char += t[0:1]
+			t = t[1:]
+		if char != "\\" and char != "\\v" and (char[0:2] != "\\{" or char[-1] == "}"):
+			characters.append(char)
+			char = ""
 
-					if pad_length - len(characters) > 0:
-						output += ",$FF"
-						for i in range(len(characters) + 1, pad_length):
-							output += ",$00"
+	result = ""
+	for char in characters:
+		result += chars[char]
+	return result
 
-				else:
-					output += token
-				even = not even
+def asmQuote(t):
+	result = ""
+	quoted = False
+	if t[0] in asmProblemBytes:
+		result = '{0}'.format(ord(t[0]))
+	else:
+		result = '"' + t[0]
+		quoted = True
 
+	while len(t):
+		if quoted and t[0] in asmProblemBytes:
+			result += '",{0}'.format(ord(t[0]))
+			quoted = False
+		elif quoted:
+			result += t[0]
+		elif t[0] in asmProblemBytes:
+			result += ',{0}'.format(ord(t[0]))
+			quoted = False
 		else:
-			asm = asm.replace("\\0", "\",$00,\"")
-			asm = asm.replace("é",   "\x7F")
-			output = asm
-
-		out.write(output)
-f.closed
+			result += ',"' + t[0]
+			quoted = True
+		t = t[1:]
+	if quoted:
+		result += '"'
+	return result
+\ No newline at end of file
diff --git a/scripts/regionalize.py b/scripts/regionalize.py
new file mode 100644
index 0000000..c178d06
--- /dev/null
+++ b/scripts/regionalize.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+import sys
+from gen3text import utf8ToRSText
+from asmquote import asmQuote
+
+data_region = sys.argv[3] # determines region code
+text_region = sys.argv[4] # determines string translation
+
+out = open(sys.argv[2], 'w')
+
+with open(sys.argv[1], 'rb') as f:
+	for asm in f:
+		asms = asm.split('"')
+		command = asms[0].strip()
+		if (command == "Text_" + text_region) or (command == "Text"):
+			asms[1] = utf8ToRSText(asms[1], text_region)
+			try:
+				length = asms[2].split(';')[0] # strip trailing comment
+				padding = int(length) - len(asms[1])
+				if padding > 0:
+					asms[1] += '\xFF'
+				for i in range(padding - 1):
+					asms[1] += "\x00"
+			except ValueError:
+				pass
+			out.write("db " + asmQuote(asms[1]) + "\n")
+		elif len(command) < 5 or command[0:5] != "Text_":
+			out.write(asm)
+			if "macros.asm" in asm:
+				# can’t do this until after REGION_EN, etc. are loaded
+				out.write("REGION EQU REGION_" + data_region + "\n")
+		# else this is foreign text, delete it
+f.closed
+\ No newline at end of file
diff --git a/scripts/scriptchecksum.py b/scripts/scriptchecksum.py
deleted file mode 100644
index d40a12d..0000000
--- a/scripts/scriptchecksum.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import struct
-import sys
-
-out = open(sys.argv[2], 'w')
-sum = 0x1121
-len = 0
-with open(sys.argv[1], 'rb') as f:
-	while True:
-		byte = f.read(1)
-		if not byte:
-			break
-
-		sum ^= ord(byte)
-		for i in range(8):
-			if(sum & 1):
-				sum = (sum >> 1) ^ 0x8408
-			else:
-				sum >>= 1
-		len += 1
-	sum = ~sum & 0xFFFF
-f.closed
-
-out.write(struct.pack('<I', sum))
-out.write(struct.pack('<I', 0x0200001E))
-out.write(struct.pack('<I', 0x0200001E + len))
-with open(sys.argv[1], 'rb') as f:
-	out.write(f.read())
-f.closed
-\ No newline at end of file
diff --git a/scripts/stripgbc.py b/scripts/stripgbc.py
new file mode 100644
index 0000000..146d119
--- /dev/null
+++ b/scripts/stripgbc.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+import sys
+
+out = open(sys.argv[2], 'w')
+buffering = False
+buf = ""
+with open(sys.argv[1], 'rb') as f:
+	f.read(256) # skip to $0100
+	while True:
+		byte = f.read(1)
+		if not byte:
+			break
+
+		# the program shall end with $FF followed only by $00 bytes
+		# for every $FF we hit, buffer until something that isn’t $00
+		if (not buffering and ord(byte) == 0xFF) or (buffering and ord(byte) == 0x00):
+			buf += byte
+			buffering = True
+		elif buffering and ord(byte) == 0xFF:
+			out.write(buf)
+			buf = byte
+		elif buffering:
+			out.write(buf)
+			out.write(byte)
+			buf = ""
+			buffering = False
+		else:
+			out.write(byte)
+f.closed
+\ No newline at end of file
diff --git a/scripts/trainerchecksum.py b/scripts/trainerchecksum.py
deleted file mode 100644
index 78d7ffa..0000000
--- a/scripts/trainerchecksum.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import struct
-import sys
-
-out = open(sys.argv[2], 'w')
-sum = 0
-with open(sys.argv[1], 'rb') as f:
-	buf = f.read()
-	data = struct.unpack('<IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII', buf)
-	for word in data:
-		sum = (sum + word) % 2**32
-f.closed
-out.write(buf)
-out.write(struct.pack('<I', sum))
-\ No newline at end of file
author	Háčky <hatschky@gmail.com>	2014-11-27 07:15:45 +0000
committer	Háčky <hatschky@gmail.com>	2014-11-27 07:15:45 +0000
commit	319ed6d0e0266f5f6174a5acd929914db8cfae1d (patch)
tree	76408d9cadcbed1e702efa8c44f4cf5e364b0a4d /scripts
parent	17853b802692237d98f96d1a80caf2019f71753e (diff)