forgot to add calcrom

author: Seth Barberee <seth.barberee@gmail.com> 2021-01-28 12:57:22 -0600
committer: Seth Barberee <seth.barberee@gmail.com> 2021-01-28 12:57:22 -0600
commit: 96297af9cc0ece7833d2729bc0faab8a81fdaada (patch)
tree: f51c69c2ebdf9612355adabd377c564ea29f47fb
parent: 041851f669d3cf1917c9233033b85a7867d81b41 (diff)
1 files changed, 204 insertions, 0 deletions
diff --git a/.github/calcrom/calcrom.pl b/.github/calcrom/calcrom.pl
new file mode 100755
index 0000000..5fe966e
--- /dev/null
+++ b/.github/calcrom/calcrom.pl
@@ -0,0 +1,204 @@
+#!/usr/bin/perl
+
+use IPC::Cmd qw[ run ];
+
+(@ARGV == 1)
+    or die "ERROR: no map file specified.\n";
+open(my $file, $ARGV[0])
+    or die "ERROR: could not open file '$ARGV[0]'.\n";
+
+my $src = 0;
+my $asm = 0;
+my $srcdata = 0;
+my $data = 0;
+while (my $line = <$file>)
+{
+    if ($line =~ /^ \.(\w+)\s+0x[0-9a-f]+\s+(0x[0-9a-f]+) (\w+)\/.+\.o/)
+    {
+        my $section = $1;
+        my $size = hex($2);
+        my $dir = $3;
+
+        if ($section =~ /text/)
+        {
+            if ($dir eq 'src')
+            {
+                $src += $size;
+            }
+            elsif ($dir eq 'asm')
+            {
+                $asm += $size;
+            }
+        }
+        elsif ($section =~ /rodata/)
+        {
+            if ($dir eq 'src')
+            {
+                $srcdata += $size;
+            }
+            elsif ($dir eq 'data')
+            {
+                $data += $size;
+            }
+        }
+    }
+}
+
+(my $elffname = $ARGV[0]) =~ s/\.map/.elf/;
+
+# Note that the grep filters out all branch labels. It also requires a minimum
+# line length of 5, to filter out a ton of generated symbols (like AcCn). No
+# settings to nm seem to remove these symbols. Finally, nm prints out a separate
+# entry for whenever a name appears in a file, not just where it's defined. uniq
+# removes all the duplicate entries.
+#
+#
+# You'd expect this to take a while, because of uniq. It runs in under a second,
+# though. Uniq is pretty fast!
+my $base_cmd = "nm $elffname | awk '{print \$3}' | grep '^[^_].\\{4\\}' | uniq";
+
+# This looks for Unknown_, Unknown_, or sub_, followed by just numbers. Note that
+# it matches even if stuff precedes the unknown, like sUnknown/gUnknown.
+# 'sub_' anchors to the start so it does not consider symbols like 'nullsub_12' undocumented.
+my $undoc_cmd = "grep -E '[Uu]nknown_[0-9a-fA-F]*|^sub_[0-9a-fA-F]*'";
+
+# This looks for every symbol with an address at the end of it. Some things are
+# given a name based on their type / location, but still have an unknown purpose.
+# For example, FooMap_EventScript_FFFFFFF.
+my $partial_doc_cmd = "grep '_[0-38-9][0-9a-fA-F]\\{5,6\\}'";
+
+my $count_cmd = "wc -l";
+
+my $incbin_cmd = "find \"\$(dirname $elffname)\" \\( -name '*.s' -o -name '*.inc' \\) -exec cat {} ';' | grep -oE '^\\s*\\.incbin\\s*\"[^\"]+\"\s*,\\s*(0x)?[0-9a-fA-F]+\\s*,\\s*(0x)?[0-9a-fA-F]+' -";
+
+# It sucks that we have to run this three times, but I can't figure out how to get
+# stdin working for subcommands in perl while still having a timeout. It's decently
+# fast anyway.
+my $total_syms_as_string;
+(run (
+    command => "$base_cmd | $count_cmd",
+    buffer => \$total_syms_as_string,
+    timeout => 60
+))
+    or die "ERROR: Error while getting all symbols: $?";
+
+my $undocumented_as_string;
+(run (
+    command => "$base_cmd | $undoc_cmd | $count_cmd",
+    buffer => \$undocumented_as_string,
+    timeout => 60
+))
+    or die "ERROR: Error while filtering for undocumented symbols: $?";
+
+my $partial_documented_as_string;
+(run (
+    command => "$base_cmd | $partial_doc_cmd | $count_cmd",
+    buffer => \$partial_documented_as_string,
+    timeout => 60
+))
+    or die "ERROR: Error while filtering for partial symbols: $?";
+
+my $incbin_count_as_string;
+(run (
+    command => "$incbin_cmd | $count_cmd",
+    buffer => \$incbin_count_as_string,
+    timeout => 60
+))
+    or die "ERROR: Error while counting incbins: $?";
+
+my $incbin_bytes_as_string;
+(run (
+    command => "(echo -n 'ibase=16;' ; $incbin_cmd | sed -E 's/.*,\\s*0x([0-9a-fA-F]+)/\\1/' | tr '\\n' '+'; echo '0' ) | bc",
+    buffer => \$incbin_bytes_as_string,
+    timeout => 60
+))
+    or die "ERROR: Error while calculating incbin totals: $?";
+
+# Performing addition on a string converts it to a number. Any string that fails
+# to convert to a number becomes 0. So if our converted number is 0, but our string
+# is nonzero, then the conversion was an error.
+my $undocumented = $undocumented_as_string + 0;
+(($undocumented != 0) and ($undocumented_as_string ne "0"))
+    or die "ERROR: Cannot convert string to num: '$undocumented_as_string'";
+
+my $partial_documented = $partial_documented_as_string + 0;
+(($partial_documented != 0) and ($partial_documented_as_string ne "0"))
+	or die "ERROR: Cannot convert string to num: '$partial_documented_as_string'";
+
+my $total_syms = $total_syms_as_string + 0;
+(($total_syms != 0) and ($total_syms_as_string ne "0"))
+    or die "ERROR: Cannot convert string to num: '$total_syms_as_string'";
+
+($total_syms != 0)
+    or die "ERROR: No symbols found.";
+
+my $incbin_count = $incbin_count_as_string + 0;
+(($incbin_count != 0) and ($incbin_count_as_string ne "0"))
+    or die "ERROR: Cannot convert string to num: '$incbin_count_as_string'";
+
+my $incbin_bytes = $incbin_bytes_as_string + 0;
+(($incbin_bytes != 0) and ($incbin_bytes_as_string ne "0"))
+    or die "ERROR: Cannot convert string to num: '$incbin_bytes_as_string'";
+
+
+my $total = $src + $asm;
+my $srcPct = sprintf("%.4f", 100 * $src / $total);
+my $asmPct = sprintf("%.4f", 100 * $asm / $total);
+
+# partial_documented is double-counting the unknown_* and sub_* symbols.
+$partial_documented = $partial_documented - $undocumented;
+
+my $documented = $total_syms - ($undocumented + $partial_documented);
+my $docPct = sprintf("%.4f", 100 * $documented / $total_syms);
+my $partialPct = sprintf("%.4f", 100 * $partial_documented / $total_syms);
+my $undocPct = sprintf("%.4f", 100 * $undocumented / $total_syms);
+
+if ($asm == 0)
+{
+    print "Code decompilation is 100% complete\n"
+}
+else
+{
+    print "$total total bytes of code\n";
+    print "$src bytes of code in src ($srcPct%)\n";
+    print "$asm bytes of code in asm ($asmPct%)\n";
+}
+print "\n";
+
+if ($partial_documented == 0 && $undocumented == 0)
+{
+    print "Documentation is 100% complete\n"
+}
+else
+{
+    print "$total_syms total symbols\n";
+    print "$documented symbols documented ($docPct%)\n";
+    print "$partial_documented symbols partially documented ($partialPct%)\n";
+    print "$undocumented symbols undocumented ($undocPct%)\n";
+}
+
+print "\n";
+my $dataTotal = $srcdata + $data;
+my $srcDataPct = sprintf("%.4f", 100 * $srcdata / $dataTotal);
+my $dataPct = sprintf("%.4f", 100 * $data / $dataTotal);
+
+my $incPct = sprintf("%.4f", 100 * $incbin_bytes / $dataTotal);
+
+if ($data == 0)
+{
+    print "Data porting to C is 100% complete\n"
+}
+else
+{
+    print "$dataTotal total bytes of data\n";
+    print "$srcdata bytes of data in src ($srcDataPct%)\n";
+    print "$data bytes of data in data ($dataPct%)\n";
+}
+
+print "\n";
+
+if ($incbin_count == 0) {
+    print "All incbins have been eliminated\n"
+} else {
+    print "$incbin_bytes bytes of data in $incbin_count incbins ($incPct%)\n"
+}
author	Seth Barberee <seth.barberee@gmail.com>	2021-01-28 12:57:22 -0600
committer	Seth Barberee <seth.barberee@gmail.com>	2021-01-28 12:57:22 -0600
commit	96297af9cc0ece7833d2729bc0faab8a81fdaada (patch)
tree	f51c69c2ebdf9612355adabd377c564ea29f47fb
parent	041851f669d3cf1917c9233033b85a7867d81b41 (diff)