source: xtideuniversalbios/trunk/XTIDE_Universal_BIOS/Src/StringsCompress.pl @ 196

Last change on this file since 196 was 196, checked in by gregli@…, 12 years ago

Added printing of COM port and baud rate, when set explicitly by idecfg. Although it eats some bytes, I think it is worth it, since the BIOS will be looking for a server on a particular com port and baud rate, and it could be hard to troubleshoot a mismatch without this information. However, if we become space crunched, this change can be backed out.

File size: 10.5 KB
RevLine 
[189]1#======================================================================================
2#
3# Project name  :   XTIDE Universal BIOS
4#
5# Authors       :   Greg Lindhorst
6#                   gregli@hotmail.com
7#
8# Description   :   Script for compiling and compressing strings for
9#                   use by DisplayFormatCompressed.asm.  See the header of that file
10#                   for a description of the compression scheme.
11#
12# Usage         :   stdin:  Listing of strings.asm,
13#                           assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14#                           We used the listing so that the assembler can take care of
15#                           resolving %define and EQU symbol definitions.
16#
17#                   stdout: StringsCompressed.asm,
18#                           plug replacement for Strings.asm (included by Main.asm)
19#
20#                   Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
25# Translated and Format characters
26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28#  1. Those in the Translate associative array
29#  2. Those in the Format associative array
30#  3. Characters between $normal_base and $normal_base+0x40
31#  4. Null characters (marking the end of strings)
32#  5. The special string LF,CR
33#
34# If a character or format read at the input cannot be found in one of the above categories,
35# it must be added here before this script will accept it (and DisplayFormatCompressed can
36# display it).
37#
38# Note that these tables are not present in DisplayFormatCompressed, and do not need to
39# updated there.  Needed information is put in the compression output that it reads.
40#
41$translate{ord(' ')} = 0;
42$translate{172}      = 1;     # ONE_QUARTER
43$translate{171}      = 2;     # ONE_HALF
44$translate{179}      = 3;     # SINGLE_VERTICAL
45$translate{175}      = 4;     # ANGLE_QUOTE_RIGHT
46$translate{ord('!')} = 5;
47$translate{ord('"')} = 6;
48$translate{ord(',')} = 7;
49$translate{ord('-')} = 8;
50$translate{ord('.')} = 9;
51$translate{ord('/')} = 10;
52$translate{ord('1')} = 11;   
53$translate{ord('2')} = 12;
54$translate{ord('3')} = 13;
55$translate{ord('4')} = 14;
56$translate{ord('5')} = 15;
57$translate{ord('6')} = 16;
58$translate{ord('8')} = 17;
59$translate{200}      = 18;    # DOUBLE_BOTTOM_LEFT_CORNER
60$translate{181}      = 19;    # DOUBLE_LEFT_HORIZONTAL_TO_SINGLE_VERTICAL
61
62#
63# Formats begin immediately after the last Translated character (they are in the same table)
64#
65$format_begin = 20;
66
67$format{"s"}   = 20;        # n/a
68$format{"c"}   = 21;        # n/a
69$format{"2-I"} = 22;        # must be even
70$format{"u"}   = 23;        # must be odd
71$format{"5-u"} = 24;        # must be even
72$format{"x"}   = 25;        # must be odd
73$format{"5-x"} = 26;        # must be even
74$format{"nl"}  = 27;        # n/a
75$format{"2-u"} = 28;        # must be even
76$format{"A"}   = 29;        # n/a
77
78# NOTE: The last $format cannot exceed 31 (stored in a 5-bit quantity).
79
80#
81# Starting point for the "normal" range, typically around 0x40 to cover upper and lower case
82# letters.  If lower case 'z' is not used, 0x3a can be a good choice as it adds ':' to the
83# front end.
84#
85$normal_base = 0x3a;
86
87#
88# High order code bits, determining which type of character we have (translated or not) and
89# if a space or null should come after this character.
90#
91$code_space = 0xc0;
92$code_null = 0x80;
93$code_normal = 0x40;
94$code_translate = 0x00;
95
96#
97# Bit used if it is a translated byte
98#
99$code_translate_null = 0x00;
100$code_translate_normal = 0x20;
101
102print ";;;======================================================================\n";
103print ";;;\n";
104print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
105print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
106print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
107print ";;;\n";
108print ";;;======================================================================\n\n";
109
110#
111# Loop through lines of the listing, looking for 'db' lines (and dealing with continuations)
112# and compressing each line as it is encountered.
113#
114while(<>)
115{
116    #
117    # The <number> indicates a line from an include file, do not include in the output
118    #
119    if( /^\s*\d+\s*\<\d\>/ )
120    {
121    }
122
123    #
124    # a 'db' line, with or without a label
125    #
126    elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
127    {
128        $bytes = $1;
129        $continuation = $2;
130        $label = $3;
131        $spacing = $4;
132        $db = $5;
133        $string = $6;
134
135        print $label.$spacing."; ".$db.$string."\n";
136
137        if( $continuation eq "-" )
138        {
139            do
140            {
141                $_ = <>;
142                /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation";
143                $bytes .= $1;
144                $continuation = $2;
145            }
146            while( $continuation eq "-" );
147        }
148
149        &processString( $bytes, $label.$spacing, $db );
150    }
151   
152    #
153    # everything else, copy to the output as is
154    #
155    elsif( /^\s*\d+\s*(.*)$/ )
156    {
157        print $1."\n";
158    }
159}
160
161print ";;; end of strings.asm\n\n";
162
163#--------------------------------------------------------------------------------
164#
165# Output constants and the TranslatesAndFormats table
166#
167
168print "StringsCompressed_NormalBase     equ   ".$normal_base."\n\n";
169
170print "StringsCompressed_FormatsBegin   equ   ".$format_begin."\n\n";
171
172print "StringsCompressed_TranslatesAndFormats: \n";
173
174foreach $f (keys(%translate))
175{
176    $translate_index[$translate{$f}] = $f;
[196]177    $used{$f} || die "translate $f unused\n";
[189]178    $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
179}
180
181for( $g = 0; $translate_index[$g]; $g++ )
182{
183    print "        db     ".$translate_index[$g]."  ; ".$g."\n";
184}
185
186foreach $f (keys(%format))
187{
188    $n = $f;
189    $n =~ s/\-/_/g;
190    $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
191    $used{$f} || die "format $f unused\n";
192    $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
193}
194
195for( $t = $format_begin; $format_index[$t]; $t++ )
196{
197    print "        db     (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].")    ; ".$t."\n";
198}
199
200print "\n";
201
202#
203# Ensure that branch targets are within reach
204#
[194]205print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
[189]206for( $t = $format_begin; $format_index[$t]; $t++ )
207{
208    print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
209    print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
210    print "%endif\n";
211}
[194]212print "%endif\n";
[189]213
214#--------------------------------------------------------------------------------
215#
216# Output usage statistics
217#
218
219print "\n;; translated usage stats\n";
220foreach $f (keys(%special))
221{
222    print ";; ".$f.":".$used{$f}."\n";
223    $translate_count++;
224}
225print ";; total translated: ".$translate_count."\n";
226
227print "\n;; format usage stats\n";
228$format_count = 0;
229foreach $f (keys(%format))
230{
231    print ";; ".$f.":".$used{$f}."\n";
232    $format_count++;
233}
234print ";; total format: ".$format_count."\n";
235
236print "\n;; alphabet usage stats\n";
237
238$used_count = 0;
239for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
240{
241    print ";; ".$t.",".chr($t).":".$used{$t}."\n";
242    if( $used{$t} )
243    {
244        $used_count++;
245    }
246}
247print ";; alphabet used count: ".$used_count."\n";
248
249#--------------------------------------------------------------------------------
250#
251# processString does the real compression work...
252#
253
254sub processString
255{
256    $chars = $_[0];
257    $label = $_[1];
258    $db = $_[2];
259
260    $label =~ s/[a-z0-9_:]/ /ig;      # replace with spaces for proper output spacing
261
262    #
263    # Copy numeric bytes out of hexadecimal pairs in the listing
264    #
265    $#v = 0;
266
267    $orig = "";
268    for( $g = 0; $g < length($chars); $g += 2 )
269    {
270        $i = $g/2;
271        $v[$i] = hex(substr($chars,$g,2));
272        $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ",  %02xh"), $v[$i] );
273    }
274    $v[length($chars)/2] = 0xff;      # guard byte to avoid thinking going past the end of
275                                      # the string is a null
276
277    $output = "";
278    #
279    # Loop through bytes...
280    # looking ahead as needed for possible space and null optimizations, compiling formats
281    #
282    for( $g = 0; $g < $#v-1; $g++ )    # -1 for the guard byte
283    {
284        #
285        # Special translation of LF,CR to a format
286        #
287        if( $v[$g] == 10 && $v[$g+1] == 13 )
288        {
289            $g++;
290            $post = $code_translate;
291            $code = $format{"nl"};
292            $used{"nl"}++;
293        }
294
295        #
296        # Format operators
297        #
298        elsif( $v[$g] == 0x25 )    # "%"
299        {
300            $fo = "";
301            $g++;
302            if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
303            {
304                $fo = $fo.chr($v[$g]);
305                $g++;
306            }
307            if( $v[$g] == ord("-") )
308            {
309                $fo = $fo.chr($v[$g]);
310                $g++;
311            }
312            $fo = $fo.chr($v[$g]);
313
314            $format{$fo} || die "unknown format operator: '".$fo."'\n";
315
316            $code = $format{$fo};
317            $post = $code_translate;
318            $used{$fo}++;
319        }
320
321        #
322        # Translated characters
323        #
324        elsif( $v[$g] == 32 || $translate{$v[$g]} )
325        {
326            $post = $code_translate;
327            $code = $translate{$v[$g]};
328            $used{$v[$g]}++;
329        }
330       
331        #
332        # "normal" characters (alphabet, and ASCII characters around the alphabet)
333        #
334        elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
335        {
336            $used{$v[$g]}++;
337
338            $post = $code_normal;
339            $code = $v[$g] - $normal_base;
340        }
341
342        #
343        # Not found
344        #
345        else
346        {
347            die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
348        }
349
350        if( $post == $code_translate )
351        {
352            #
353            # NULL optimization (space optimization not possible on translate/format)
354            #
355            if( $v[$g+1] == 0 )
356            {
357                $g++;
358                $post = $post | $code_translate_null;
359            }
360            else
361            {
362                $post = $post | $code_translate_normal;
363            }
364        }
365        else # $post == $code_normal
366        {
367            #
368            # Space optimization
369            #
370            if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
371            {
372                # can't take this optimization if the next byte is a null,
373                # since we can't have both a postfix space and null
374                $g++;
375                $post = $code_space;
376            }
377
378            #
379            # NULL optimization
380            #
381            elsif( $v[$g+1] == 0 )
382            {
383                $g++;
384                $post = $code_null;
385            }
386        }
387
388        $code = $code | $post;
389        $output .= sprintf( ($code > 0x9f ? ", %03xh" : ",  %02xh"), $code );
390    }
391
392    print $label."; ".$db.substr($orig,2)."    ; uncompressed\n";
393    print $label."  ".$db.substr($output,2);
394    for( $t = length($output); $t < length($orig); $t++ )
395    {
396        print " ";
397    }
398    print "    ; compressed\n\n";
399}
400
Note: See TracBrowser for help on using the repository browser.