#====================================================================================== # # Project name : XTIDE Universal BIOS # # Authors : Greg Lindhorst # gregli@hotmail.com # # Description : Script for compiling and compressing strings for # use by DisplayFormatCompressed.asm. See the header of that file # for a description of the compression scheme. # # Usage : stdin: Listing of strings.asm, # assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS. # We used the listing so that the assembler can take care of # resolving %define and EQU symbol definitions. # # stdout: StringsCompressed.asm, # plug replacement for Strings.asm (included by Main.asm) # # Also see the XTIDE makefile for building StringsCompressed.asm # #---------------------------------------------------------------------- # # Translated and Format characters # # DisplayFormatCompressed can only deal with characters in one of the following categories: # 1. Those in the Translate associative array # 2. Those in the Format associative array # 3. Characters between $normal_base and $normal_base+0x40 # 4. Null characters (marking the end of strings) # 5. The special string LF,CR # # If a character or format read at the input cannot be found in one of the above categories, # it must be added here before this script will accept it (and DisplayFormatCompressed can # display it). # # Note that these tables are not present in DisplayFormatCompressed, and do not need to # updated there. Needed information is put in the compression output that it reads. # $translate{ord(' ')} = 0; $translate{172} = 1; # ONE_QUARTER $translate{171} = 2; # ONE_HALF $translate{179} = 3; # SINGLE_VERTICAL $translate{175} = 4; # ANGLE_QUOTE_RIGHT $translate{ord('!')} = 5; $translate{ord('"')} = 6; $translate{ord(',')} = 7; $translate{ord('-')} = 8; $translate{ord('.')} = 9; $translate{ord('/')} = 10; $translate{ord('1')} = 11; $translate{ord('2')} = 12; $translate{ord('3')} = 13; $translate{ord('4')} = 14; $translate{ord('5')} = 15; $translate{ord('6')} = 16; $translate{ord('8')} = 17; $translate{200} = 18; # DOUBLE_BOTTOM_LEFT_CORNER $translate{181} = 19; # DOUBLE_LEFT_HORIZONTAL_TO_SINGLE_VERTICAL # # Formats begin immediately after the last Translated character (they are in the same table) # $format_begin = 20; $format{"s"} = 20; # n/a $format{"c"} = 21; # n/a $format{"2-I"} = 22; # must be even $format{"u"} = 23; # must be odd $format{"5-u"} = 24; # must be even $format{"x"} = 25; # must be odd $format{"5-x"} = 26; # must be even $format{"nl"} = 27; # n/a $format{"2-u"} = 28; # must be even $format{"A"} = 29; # n/a # NOTE: The last $format cannot exceed 31 (stored in a 5-bit quantity). # # Starting point for the "normal" range, typically around 0x40 to cover upper and lower case # letters. If lower case 'z' is not used, 0x3a can be a good choice as it adds ':' to the # front end. # $normal_base = 0x3a; # # High order code bits, determining which type of character we have (translated or not) and # if a space or null should come after this character. # $code_space = 0xc0; $code_null = 0x80; $code_normal = 0x40; $code_translate = 0x00; # # Bit used if it is a translated byte # $code_translate_null = 0x00; $code_translate_normal = 0x20; print ";;;======================================================================\n"; print ";;;\n"; print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n"; print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n"; print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n"; print ";;;\n"; print ";;;======================================================================\n\n"; # # Loop through lines of the listing, looking for 'db' lines (and dealing with continuations) # and compressing each line as it is encountered. # while(<>) { # # The indicates a line from an include file, do not include in the output # if( /^\s*\d+\s*\<\d\>/ ) { } # # a 'db' line, with or without a label # elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i ) { $bytes = $1; $continuation = $2; $label = $3; $spacing = $4; $db = $5; $string = $6; print $label.$spacing."; ".$db.$string."\n"; if( $continuation eq "-" ) { do { $_ = <>; /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation"; $bytes .= $1; $continuation = $2; } while( $continuation eq "-" ); } &processString( $bytes, $label.$spacing, $db ); } # # everything else, copy to the output as is # elsif( /^\s*\d+\s*(.*)$/ ) { print $1."\n"; } } print ";;; end of strings.asm\n\n"; #-------------------------------------------------------------------------------- # # Output constants and the TranslatesAndFormats table # print "StringsCompressed_NormalBase equ ".$normal_base."\n\n"; print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n"; print "StringsCompressed_TranslatesAndFormats: \n"; foreach $f (keys(%translate)) { $translate_index[$translate{$f}] = $f; $used{$f} || die "translate $f unused\n"; $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32"; } for( $g = 0; $translate_index[$g]; $g++ ) { print " db ".$translate_index[$g]." ; ".$g."\n"; } foreach $f (keys(%format)) { $n = $f; $n =~ s/\-/_/g; $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n; $used{$f} || die "format $f unused\n"; $format{$f} <= 31 || die $format{$f}.": format codes must be below 32"; } for( $t = $format_begin; $format_index[$t]; $t++ ) { print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n"; } print "\n"; # # Ensure that branch targets are within reach # print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n"; for( $t = $format_begin; $format_index[$t]; $t++ ) { print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n"; print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n"; print "%endif\n"; } print "%endif\n"; #-------------------------------------------------------------------------------- # # Output usage statistics # print "\n;; translated usage stats\n"; foreach $f (keys(%special)) { print ";; ".$f.":".$used{$f}."\n"; $translate_count++; } print ";; total translated: ".$translate_count."\n"; print "\n;; format usage stats\n"; $format_count = 0; foreach $f (keys(%format)) { print ";; ".$f.":".$used{$f}."\n"; $format_count++; } print ";; total format: ".$format_count."\n"; print "\n;; alphabet usage stats\n"; $used_count = 0; for( $t = $normal_base; $t < $normal_base + 0x40; $t++ ) { print ";; ".$t.",".chr($t).":".$used{$t}."\n"; if( $used{$t} ) { $used_count++; } } print ";; alphabet used count: ".$used_count."\n"; #-------------------------------------------------------------------------------- # # processString does the real compression work... # sub processString { $chars = $_[0]; $label = $_[1]; $db = $_[2]; $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing # # Copy numeric bytes out of hexadecimal pairs in the listing # $#v = 0; $orig = ""; for( $g = 0; $g < length($chars); $g += 2 ) { $i = $g/2; $v[$i] = hex(substr($chars,$g,2)); $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] ); } $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of # the string is a null $output = ""; # # Loop through bytes... # looking ahead as needed for possible space and null optimizations, compiling formats # for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte { # # Special translation of LF,CR to a format # if( $v[$g] == 10 && $v[$g+1] == 13 ) { $g++; $post = $code_translate; $code = $format{"nl"}; $used{"nl"}++; } # # Format operators # elsif( $v[$g] == 0x25 ) # "%" { $fo = ""; $g++; if( $v[$g] >= ord("0") && $v[$g] <= ord("9") ) { $fo = $fo.chr($v[$g]); $g++; } if( $v[$g] == ord("-") ) { $fo = $fo.chr($v[$g]); $g++; } $fo = $fo.chr($v[$g]); $format{$fo} || die "unknown format operator: '".$fo."'\n"; $code = $format{$fo}; $post = $code_translate; $used{$fo}++; } # # Translated characters # elsif( $v[$g] == 32 || $translate{$v[$g]} ) { $post = $code_translate; $code = $translate{$v[$g]}; $used{$v[$g]}++; } # # "normal" characters (alphabet, and ASCII characters around the alphabet) # elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) ) { $used{$v[$g]}++; $post = $code_normal; $code = $v[$g] - $normal_base; } # # Not found # else { die $v[$g].": no translation or format, and out of normal range - may need to be added\n"; } if( $post == $code_translate ) { # # NULL optimization (space optimization not possible on translate/format) # if( $v[$g+1] == 0 ) { $g++; $post = $post | $code_translate_null; } else { $post = $post | $code_translate_normal; } } else # $post == $code_normal { # # Space optimization # if( $v[$g+1] == 0x20 && $v[$g+2] != 0 ) { # can't take this optimization if the next byte is a null, # since we can't have both a postfix space and null $g++; $post = $code_space; } # # NULL optimization # elsif( $v[$g+1] == 0 ) { $g++; $post = $code_null; } } $code = $code | $post; $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code ); } print $label."; ".$db.substr($orig,2)." ; uncompressed\n"; print $label." ".$db.substr($output,2); for( $t = length($output); $t < length($orig); $t++ ) { print " "; } print " ; compressed\n\n"; }