source: xtideuniversalbios/trunk/Tools/StringsCompress.pl @ 277

Last change on this file since 277 was 277, checked in by gregli@…, 12 years ago

Moved the bulk of the serial code to the assembly library, for inclusion in other utilities. Fixed a bug in int13h.asm when floppy support was not enabled that was preventing foreign drives from working properly.

File size: 10.4 KB
Line 
1#======================================================================================
2#
3# Project name  :   XTIDE Universal BIOS
4#
5# Authors       :   Greg Lindhorst
6#                   gregli@hotmail.com
7#
8# Description   :   Script for compiling and compressing strings for
9#                   use by DisplayFormatCompressed.asm.  See the header of that file
10#                   for a description of the compression scheme.
11#
12# Usage         :   stdin:  Listing of strings.asm,
13#                           assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14#                           We used the listing so that the assembler can take care of
15#                           resolving %define and EQU symbol definitions.
16#
17#                   stdout: StringsCompressed.asm,
18#                           plug replacement for Strings.asm (included by Main.asm)
19#
20#                   Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
25# Translated, Format, and "Normal" characters
26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28#  1. Those in the Translate associative array
29#  2. Those in the Format associative array
30#  3. Characters between $normal_base and $normal_base+0x40
31#     (typically covers upper and lower case alphabets)
32#  4. Null characters (marking the end of strings)
33#  5. The special string LF,CR
34#
35# If a character or format read at the input cannot be found in one of the above categories,
36# it must be added here before this script will accept it (and DisplayFormatCompressed can
37# display it).
38#
39# Tables for the above categories are expected in the input stream, before string to be
40# compressed are provided.  Note that these tables are not present in DisplayFormatCompressed,
41# and do not need to be updated there.  Needed information is put in the compression output
42# that it reads.
43#
44
45#
46# High order code bits, determining which type of character we have (translated or not) and
47# if a space or null should come after this character.
48#
49$code_space = 0xc0;
50$code_null = 0x80;
51$code_normal = 0x40;
52$code_translate = 0x00;
53
54#
55# Bit used if it is a translated byte
56#
57$code_translate_null = 0x00;
58$code_translate_normal = 0x20;
59
60print ";;;======================================================================\n";
61print ";;;\n";
62print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
63print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
64print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
65print ";;;\n";
66print ";;;======================================================================\n\n";
67
68
69#
70# On a first pass, look for our table directives.  $translate{...}, $format{...}, etc.
71# are expected in the input stream.
72#
73$processed = "    [StringsCompress Processed]";
74while(<>)
75{
76    chop;
77    $o = $_;
78
79    #
80    # Table entries for this script
81    #
82    if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
83    {
84        $translate{ord($2)} = int($3);
85        $o .= $processed;
86    }
87    elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
88    {
89        $translate{int($2)} = int($3);
90        $o .= $processed;
91    }
92    elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
93    {
94        $format_begin = int($2);
95        $o .= $processed;
96    }
97    elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
98    {
99        $format{$2} = int($3);
100        $o .= $processed;
101    }
102    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
103    {
104        $normal_base = hex($2);
105        $o .= $processed;
106    }
107    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
108    {
109        $normal_base = int($2);
110        $o .= $processed;
111    }
112
113    push( @lines, $o );
114}
115
116#
117# On the second pass, loop through lines of the listing, looking for 'db' lines
118# (and dealing with continuations) and compressing each line as it is encountered.
119#
120for( $l = 0; $l < $#lines; $l++ )
121{
122    $_ = $lines[$l];
123
124    #
125    # The <number> indicates a line from an include file, do not include in the output
126    #
127    if( /^\s*\d+\s*\<\d\>/ )
128    {
129    }
130
131    #
132    # a 'db' line, with or without a label
133    #
134    elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
135    {
136        $bytes = $1;
137        $continuation = $2;
138        $label = $3;
139        $spacing = $4;
140        $db = $5;
141        $string = $6;
142
143        print $label.$spacing."; ".$db.$string."\n";
144
145        if( $continuation eq "-" )
146        {
147            do
148            {
149                $_ = $lines[++$l];
150                /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
151                $bytes .= $1;
152                $continuation = $2;
153            }
154            while( $continuation eq "-" );
155        }
156
157        &processString( $bytes, $label.$spacing, $db );
158    }
159
160    #
161    # a ';%%;' prefix line, copy to output without the prefix
162    #
163    elsif( /^\s*\d+\s*;%%;\s*(.*)$/ )
164    {
165        print $1."\n";
166    }
167
168    #
169    # everything else, copy to the output as is
170    #
171    elsif( /^\s*\d+\s*(.*)$/ )
172    {
173        print $1."\n";
174    }
175}
176
177print ";;; end of input stream\n\n";
178
179#--------------------------------------------------------------------------------
180#
181# Output constants and the TranslatesAndFormats table
182#
183
184print "StringsCompressed_NormalBase     equ   ".$normal_base."\n\n";
185
186print "StringsCompressed_FormatsBegin   equ   ".$format_begin."\n\n";
187
188print "StringsCompressed_TranslatesAndFormats: \n";
189
190foreach $f (keys(%translate))
191{
192    $translate_index[$translate{$f}] = $f;
193    $used{$f} || die "translate $f unused\n";
194    $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
195}
196
197for( $g = 0; $translate_index[$g]; $g++ )
198{
199    print "        db     ".$translate_index[$g]."  ; ".$g."\n";
200}
201
202foreach $f (keys(%format))
203{
204    $n = $f;
205    $n =~ s/\-/_/g;
206    $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
207    $used{$f} || die "format $f unused\n";
208    $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
209}
210
211for( $t = $format_begin; $format_index[$t]; $t++ )
212{
213    print "        db     (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].")    ; ".$t."\n";
214}
215
216print "\n";
217
218#
219# Ensure that branch targets are within reach
220#
221print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
222for( $t = $format_begin; $format_index[$t]; $t++ )
223{
224    print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
225    print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
226    print "%endif\n";
227}
228print "%endif\n";
229
230#--------------------------------------------------------------------------------
231#
232# Output usage statistics
233#
234
235print "\n;; translated usage stats\n";
236foreach $f (keys(%translate))
237{
238    print ";; ".$f.":".$used{$f}."\n";
239    $translate_count++;
240}
241print ";; total translated: ".$translate_count."\n";
242
243print "\n;; format usage stats\n";
244$format_count = 0;
245foreach $f (keys(%format))
246{
247    print ";; ".$f.":".$used{$f}."\n";
248    $format_count++;
249}
250print ";; total format: ".$format_count."\n";
251
252print "\n;; alphabet usage stats\n";
253
254$used_count = 0;
255for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
256{
257    print ";; ".$t.",".chr($t).":".$used{$t}."\n";
258    if( $used{$t} )
259    {
260        $used_count++;
261    }
262}
263print ";; alphabet used count: ".$used_count."\n";
264
265#--------------------------------------------------------------------------------
266#
267# processString does the real compression work...
268#
269
270sub processString
271{
272    $chars = $_[0];
273    $label = $_[1];
274    $db = $_[2];
275
276    $label =~ s/[a-z0-9_:]/ /ig;      # replace with spaces for proper output spacing
277
278    #
279    # Copy numeric bytes out of hexadecimal pairs in the listing
280    #
281    $#v = 0;
282
283    $orig = "";
284    for( $g = 0; $g < length($chars); $g += 2 )
285    {
286        $i = $g/2;
287        $v[$i] = hex(substr($chars,$g,2));
288        $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ",  %02xh"), $v[$i] );
289    }
290    $v[length($chars)/2] = 0xff;      # guard byte to avoid thinking going past the end of
291                                      # the string is a null
292
293    $output = "";
294    #
295    # Loop through bytes...
296    # looking ahead as needed for possible space and null optimizations, compiling formats
297    #
298    for( $g = 0; $g < $#v-1; $g++ )    # -1 for the guard byte
299    {
300        #
301        # Special translation of LF,CR to a format
302        #
303        if( $v[$g] == 10 && $v[$g+1] == 13 )
304        {
305            $g++;
306            $post = $code_translate;
307            $code = $format{"nl"};
308            $used{"nl"}++;
309        }
310
311        #
312        # Format operators
313        #
314        elsif( $v[$g] == 0x25 )    # "%"
315        {
316            $fo = "";
317            $g++;
318            if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
319            {
320                $fo = $fo.chr($v[$g]);
321                $g++;
322            }
323            if( $v[$g] == ord("-") )
324            {
325                $fo = $fo.chr($v[$g]);
326                $g++;
327            }
328            $fo = $fo.chr($v[$g]);
329
330            $format{$fo} || die "unknown format operator: '".$fo."'\n";
331
332            $code = $format{$fo};
333            $post = $code_translate;
334            $used{$fo}++;
335        }
336
337        #
338        # Translated characters
339        #
340        elsif( $v[$g] == 32 || $translate{$v[$g]} )
341        {
342            $post = $code_translate;
343            $code = $translate{$v[$g]};
344            $used{$v[$g]}++;
345        }
346
347        #
348        # "normal" characters (alphabet, and ASCII characters around the alphabet)
349        #
350        elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
351        {
352            $used{$v[$g]}++;
353
354            $post = $code_normal;
355            $code = $v[$g] - $normal_base;
356        }
357
358        #
359        # Not found
360        #
361        else
362        {
363            die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
364        }
365
366        if( $post == $code_translate )
367        {
368            #
369            # NULL optimization (space optimization not possible on translate/format)
370            #
371            if( $v[$g+1] == 0 )
372            {
373                $g++;
374                $post = $post | $code_translate_null;
375            }
376            else
377            {
378                $post = $post | $code_translate_normal;
379            }
380        }
381        else # $post == $code_normal
382        {
383            #
384            # Space optimization
385            #
386            if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
387            {
388                # can't take this optimization if the next byte is a null,
389                # since we can't have both a postfix space and null
390                $g++;
391                $post = $code_space;
392            }
393
394            #
395            # NULL optimization
396            #
397            elsif( $v[$g+1] == 0 )
398            {
399                $g++;
400                $post = $code_null;
401            }
402        }
403
404        $code = $code | $post;
405        $output .= sprintf( ($code > 0x9f ? ", %03xh" : ",  %02xh"), $code );
406    }
407
408    print $label."; ".$db.substr($orig,2)."    ; uncompressed\n";
409    print $label."  ".$db.substr($output,2);
410    for( $t = length($output); $t < length($orig); $t++ )
411    {
412        print " ";
413    }
414    print "    ; compressed\n\n";
415}
416
Note: See TracBrowser for help on using the repository browser.