The script searches a file for identical chunks of bytes longer than a given minimum and displays any matches found, with the offset to each occurrence and the hex bytes there so you can see what matched and maybe even do some mental disassembly. The file probably doesn't start at &0000 in the Beeb's memory, so the script lets you add an optional base address to offsets within the file. This lets you play around with the Beeb's memory directly, or search the output from beebasm -v for the nearest .label declaration.
Since a match of N bytes at offset A also implies a match of N-1 bytes at offset A+1, N-2 bytes at A+2 and so forth, the default behaviour is that after a chunk of code has been matched, the script skips to the end of it to look for the next match. But this has the potential to miss partial matches, so it can be turned off. (Of course, any such partial matches will show up anyway if the process is repeated after reassembling the Source Code to get rid of the longest match; but you might spot a short cut in the long output.)
Code: Select all
#!/bin/perl -w
use strict;
use constant { TRUE => 1, FALSE => "" };
use Data::Dumper;
use Getopt::Std;
$| = TRUE;
my %OPTIONS;
getopts "b:ei:km:v", \%OPTIONS;
my $IN_FILE = $OPTIONS{"i"} // "M.PARSE4";
my $MIN_MATCH = $OPTIONS{"m"} // 6;
my $BASE_ADDR = $OPTIONS{"b"} // 0;
my $VERBOSE = $OPTIONS{"v"};
my $DONT_SKIP = $OPTIONS{"k"};
my $EARLIEST = $OPTIONS{"e"};
if ($BASE_ADDR =~ /([0-9a-fA-F]+)/) {
$BASE_ADDR = hex $1;
}
else {
$BASE_ADDR = 0;
};
# Working variables
my ($code, $length, $offset1, $offset2, $char1, $char2, $matched, $longest,
$offset1a, $offset2a, $char1a, $char2a, $done, $i, @matches, @sorted);
######################### BEGIN FUNCTION DEFINITIONS #########################
sub read_file {
my $filename = shift;
my $contents = "";
open FH, "<", $filename or die "Could not open $filename: $!";
while (read FH, $_, 4096) {
$contents .= $_;
};
close FH;
$contents;
};
########################## END FUNCTION DEFINITIONS ##########################
$code = read_file $IN_FILE;
printf "Length of input file = &%04X\n", $length = length $code;
for ($offset1 = 0x500; $offset1 < $length - 2; ++$offset1) {
undef $longest;
$char1 = unpack "C", substr $code, $offset1, 1;
next unless $char1;
for ($offset2 = $offset1 + 1; $offset2 < $length - 1; ++$offset2) {
$char2 = unpack "C", substr $code, $offset2, 1;
next unless $char2 == $char1;
$matched = 1;
$done = FALSE;
while (!$done) {
last if $offset1 + $matched >= $length;
last if $offset2 + $matched >= $length;
$char1a = unpack "C", substr $code, $offset1 + $matched, 1;
$char2a = unpack "C", substr $code, $offset2 + $matched, 1;
if ($char2a == $char1a) {
++$matched;
}
else {
$done = TRUE;
};
};
if ($matched >= $MIN_MATCH) {
if ($VERBOSE) {
printf "Match at &%04X for &%04X ..... ", $offset2 + $BASE_ADDR, $offset1 + $BASE_ADDR;
printf "Length %d\n", $matched;
};
push @matches, {"offset1" => $offset1, "offset2" => $offset2, "length" => $matched};
if (!$longest || $longest < $matched) {
$longest = $matched;
};
};
};
if ($longest) {
if ($VERBOSE) {
printf "Skipping &%02X bytes at %04X\n", $longest, $offset1 + $BASE_ADDR;
};
$offset1 += $longest unless $DONT_SKIP;
};
};
if ($EARLIEST) {
@sorted = sort { $a->{"offset1"} <=> $b->{"offset1"}
|| $b->{"length"} <=> $a->{"length"} } @matches;
}
else {
@sorted = sort { $b->{"length"} <=> $a->{"length"}
|| $a->{"offset1"} <=> $b->{"offset1"} } @matches;
};
undef $offset1a;
foreach (@sorted) {
$offset1 = $_->{"offset1"};
$offset2 = $_->{"offset2"};
$matched = $_->{"length"};
if (!defined $offset1a || $offset1a != $offset1) {
print "\n" if defined $offset1a;
printf "%04X [%02X]:", $offset1 + $BASE_ADDR, $matched;
for ($i = 0; $i < $matched; ++$i) {
$char1 = unpack "C", substr $code, $offset1 + $i, 1;
printf " %02X", $char1;
};
print "\n";
};
printf "%04X [%02X]:", $offset2 + $BASE_ADDR, $matched;
for ($i = 0; $i < $matched; ++$i) {
$char2 = unpack "C", substr $code, $offset2 + $i, 1;
printf " %02X", $char2;
};
print "\n";
$offset1a = $offset1;
};
exit;
##### DEDICATED TO THE PUBLIC DOMAIN BY JULIE KIRSTY LOUISE MONTOYA 2022 #####
Code: Select all
$ find_repeat -i M.SPRITE1 -b 2300 -e -k
As always, this is offered in the hope that it might be found useful but without making any promises. Use it, abuse it, enjoy it, destroy it, study it, share it and adapt it as you will.