source: xtideuniversalbios/trunk/XTIDE_Universal_BIOS/Src/StringsCompress.pl @ 194

Last change on this file since 194 was 194, checked in by gregli@…, 12 years ago

ifdef'd out more unused code. Also added a tool for looking through the listing and the output of the precompiler to aid in finding dead code. Some changes in the files are to add annotations for the tool to avoid false positives.

File size: 10.5 KB
Line 
1#======================================================================================
2#
3# Project name  :   XTIDE Universal BIOS
4#
5# Authors       :   Greg Lindhorst
6#                   gregli@hotmail.com
7#
8# Description   :   Script for compiling and compressing strings for
9#                   use by DisplayFormatCompressed.asm.  See the header of that file
10#                   for a description of the compression scheme.
11#
12# Usage         :   stdin:  Listing of strings.asm,
13#                           assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14#                           We used the listing so that the assembler can take care of
15#                           resolving %define and EQU symbol definitions.
16#
17#                   stdout: StringsCompressed.asm,
18#                           plug replacement for Strings.asm (included by Main.asm)
19#
20#                   Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
25# Translated and Format characters
26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28#  1. Those in the Translate associative array
29#  2. Those in the Format associative array
30#  3. Characters between $normal_base and $normal_base+0x40
31#  4. Null characters (marking the end of strings)
32#  5. The special string LF,CR
33#
34# If a character or format read at the input cannot be found in one of the above categories,
35# it must be added here before this script will accept it (and DisplayFormatCompressed can
36# display it).
37#
38# Note that these tables are not present in DisplayFormatCompressed, and do not need to
39# updated there.  Needed information is put in the compression output that it reads.
40#
41$translate{ord(' ')} = 0;
42$translate{172}      = 1;     # ONE_QUARTER
43$translate{171}      = 2;     # ONE_HALF
44$translate{179}      = 3;     # SINGLE_VERTICAL
45$translate{175}      = 4;     # ANGLE_QUOTE_RIGHT
46$translate{ord('!')} = 5;
47$translate{ord('"')} = 6;
48$translate{ord(',')} = 7;
49$translate{ord('-')} = 8;
50$translate{ord('.')} = 9;
51$translate{ord('/')} = 10;
52$translate{ord('1')} = 11;   
53$translate{ord('2')} = 12;
54$translate{ord('3')} = 13;
55$translate{ord('4')} = 14;
56$translate{ord('5')} = 15;
57$translate{ord('6')} = 16;
58$translate{ord('8')} = 17;
59$translate{200}      = 18;    # DOUBLE_BOTTOM_LEFT_CORNER
60$translate{181}      = 19;    # DOUBLE_LEFT_HORIZONTAL_TO_SINGLE_VERTICAL
61
62#
63# Formats begin immediately after the last Translated character (they are in the same table)
64#
65$format_begin = 20;
66
67$format{"s"}   = 20;        # n/a
68$format{"c"}   = 21;        # n/a
69$format{"2-I"} = 22;        # must be even
70$format{"u"}   = 23;        # must be odd
71$format{"5-u"} = 24;        # must be even
72$format{"x"}   = 25;        # must be odd
73$format{"5-x"} = 26;        # must be even
74$format{"nl"}  = 27;        # n/a
75$format{"2-u"} = 28;        # must be even
76$format{"A"}   = 29;        # n/a
77
78# NOTE: The last $format cannot exceed 31 (stored in a 5-bit quantity).
79
80#
81# Starting point for the "normal" range, typically around 0x40 to cover upper and lower case
82# letters.  If lower case 'z' is not used, 0x3a can be a good choice as it adds ':' to the
83# front end.
84#
85$normal_base = 0x3a;
86
87#
88# High order code bits, determining which type of character we have (translated or not) and
89# if a space or null should come after this character.
90#
91$code_space = 0xc0;
92$code_null = 0x80;
93$code_normal = 0x40;
94$code_translate = 0x00;
95
96#
97# Bit used if it is a translated byte
98#
99$code_translate_null = 0x00;
100$code_translate_normal = 0x20;
101
102print ";;;======================================================================\n";
103print ";;;\n";
104print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
105print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
106print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
107print ";;;\n";
108print ";;;======================================================================\n\n";
109
110#
111# Loop through lines of the listing, looking for 'db' lines (and dealing with continuations)
112# and compressing each line as it is encountered.
113#
114while(<>)
115{
116    #
117    # The <number> indicates a line from an include file, do not include in the output
118    #
119    if( /^\s*\d+\s*\<\d\>/ )
120    {
121    }
122
123    #
124    # a 'db' line, with or without a label
125    #
126    elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
127    {
128        $bytes = $1;
129        $continuation = $2;
130        $label = $3;
131        $spacing = $4;
132        $db = $5;
133        $string = $6;
134
135        print $label.$spacing."; ".$db.$string."\n";
136
137        if( $continuation eq "-" )
138        {
139            do
140            {
141                $_ = <>;
142                /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation";
143                $bytes .= $1;
144                $continuation = $2;
145            }
146            while( $continuation eq "-" );
147        }
148
149        &processString( $bytes, $label.$spacing, $db );
150    }
151   
152    #
153    # everything else, copy to the output as is
154    #
155    elsif( /^\s*\d+\s*(.*)$/ )
156    {
157        print $1."\n";
158    }
159}
160
161print ";;; end of strings.asm\n\n";
162
163#--------------------------------------------------------------------------------
164#
165# Output constants and the TranslatesAndFormats table
166#
167
168print "StringsCompressed_NormalBase     equ   ".$normal_base."\n\n";
169
170print "StringsCompressed_FormatsBegin   equ   ".$format_begin."\n\n";
171
172print "StringsCompressed_TranslatesAndFormats: \n";
173
174foreach $f (keys(%translate))
175{
176    $translate_index[$translate{$f}] = $f;
177    $used{$f} || print "translate $f unused\n";
178    $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
179}
180
181for( $g = 0; $translate_index[$g]; $g++ )
182{
183    print "        db     ".$translate_index[$g]."  ; ".$g."\n";
184}
185
186foreach $f (keys(%format))
187{
188    $n = $f;
189    $n =~ s/\-/_/g;
190    $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
191    $used{$f} || die "format $f unused\n";
192    $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
193}
194
195for( $t = $format_begin; $format_index[$t]; $t++ )
196{
197    print "        db     (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].")    ; ".$t."\n";
198}
199
200print "\n";
201
202#
203# Ensure that branch targets are within reach
204#
205print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
206for( $t = $format_begin; $format_index[$t]; $t++ )
207{
208    print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
209    print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
210    print "%endif\n";
211}
212print "%endif\n";
213
214#--------------------------------------------------------------------------------
215#
216# Output usage statistics
217#
218
219print "\n;; translated usage stats\n";
220foreach $f (keys(%special))
221{
222    print ";; ".$f.":".$used{$f}."\n";
223    $translate_count++;
224}
225print ";; total translated: ".$translate_count."\n";
226
227print "\n;; format usage stats\n";
228$format_count = 0;
229foreach $f (keys(%format))
230{
231    print ";; ".$f.":".$used{$f}."\n";
232    $format_count++;
233}
234print ";; total format: ".$format_count."\n";
235
236print "\n;; alphabet usage stats\n";
237
238$used_count = 0;
239for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
240{
241    print ";; ".$t.",".chr($t).":".$used{$t}."\n";
242    if( $used{$t} )
243    {
244        $used_count++;
245    }
246}
247print ";; alphabet used count: ".$used_count."\n";
248
249#--------------------------------------------------------------------------------
250#
251# processString does the real compression work...
252#
253
254sub processString
255{
256    $chars = $_[0];
257    $label = $_[1];
258    $db = $_[2];
259
260    $label =~ s/[a-z0-9_:]/ /ig;      # replace with spaces for proper output spacing
261
262    #
263    # Copy numeric bytes out of hexadecimal pairs in the listing
264    #
265    $#v = 0;
266
267    $orig = "";
268    for( $g = 0; $g < length($chars); $g += 2 )
269    {
270        $i = $g/2;
271        $v[$i] = hex(substr($chars,$g,2));
272        $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ",  %02xh"), $v[$i] );
273    }
274    $v[length($chars)/2] = 0xff;      # guard byte to avoid thinking going past the end of
275                                      # the string is a null
276
277    $output = "";
278    #
279    # Loop through bytes...
280    # looking ahead as needed for possible space and null optimizations, compiling formats
281    #
282    for( $g = 0; $g < $#v-1; $g++ )    # -1 for the guard byte
283    {
284        #
285        # Special translation of LF,CR to a format
286        #
287        if( $v[$g] == 10 && $v[$g+1] == 13 )
288        {
289            $g++;
290            $post = $code_translate;
291            $code = $format{"nl"};
292            $used{"nl"}++;
293        }
294
295        #
296        # Format operators
297        #
298        elsif( $v[$g] == 0x25 )    # "%"
299        {
300            $fo = "";
301            $g++;
302            if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
303            {
304                $fo = $fo.chr($v[$g]);
305                $g++;
306            }
307            if( $v[$g] == ord("-") )
308            {
309                $fo = $fo.chr($v[$g]);
310                $g++;
311            }
312            $fo = $fo.chr($v[$g]);
313
314            $format{$fo} || die "unknown format operator: '".$fo."'\n";
315
316            $code = $format{$fo};
317            $post = $code_translate;
318            $used{$fo}++;
319        }
320
321        #
322        # Translated characters
323        #
324        elsif( $v[$g] == 32 || $translate{$v[$g]} )
325        {
326            $post = $code_translate;
327            $code = $translate{$v[$g]};
328            $used{$v[$g]}++;
329        }
330       
331        #
332        # "normal" characters (alphabet, and ASCII characters around the alphabet)
333        #
334        elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
335        {
336            $used{$v[$g]}++;
337
338            $post = $code_normal;
339            $code = $v[$g] - $normal_base;
340        }
341
342        #
343        # Not found
344        #
345        else
346        {
347            die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
348        }
349
350        if( $post == $code_translate )
351        {
352            #
353            # NULL optimization (space optimization not possible on translate/format)
354            #
355            if( $v[$g+1] == 0 )
356            {
357                $g++;
358                $post = $post | $code_translate_null;
359            }
360            else
361            {
362                $post = $post | $code_translate_normal;
363            }
364        }
365        else # $post == $code_normal
366        {
367            #
368            # Space optimization
369            #
370            if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
371            {
372                # can't take this optimization if the next byte is a null,
373                # since we can't have both a postfix space and null
374                $g++;
375                $post = $code_space;
376            }
377
378            #
379            # NULL optimization
380            #
381            elsif( $v[$g+1] == 0 )
382            {
383                $g++;
384                $post = $code_null;
385            }
386        }
387
388        $code = $code | $post;
389        $output .= sprintf( ($code > 0x9f ? ", %03xh" : ",  %02xh"), $code );
390    }
391
392    print $label."; ".$db.substr($orig,2)."    ; uncompressed\n";
393    print $label."  ".$db.substr($output,2);
394    for( $t = length($output); $t < length($orig); $t++ )
395    {
396        print " ";
397    }
398    print "    ; compressed\n\n";
399}
400
Note: See TracBrowser for help on using the repository browser.