source: xtideuniversalbios/trunk/Tools/StringsCompress.pl @ 197

Last change on this file since 197 was 197, checked in by gregli@…, 12 years ago

Some maintenance; no changes to the actual source. Moved the compression tables out of the compression script and into the source file, making the compression script source agnostic. And thus moved the compression script to the Tools directory.

File size: 10.2 KB
Line 
1#======================================================================================
2#
3# Project name  :   XTIDE Universal BIOS
4#
5# Authors       :   Greg Lindhorst
6#                   gregli@hotmail.com
7#
8# Description   :   Script for compiling and compressing strings for
9#                   use by DisplayFormatCompressed.asm.  See the header of that file
10#                   for a description of the compression scheme.
11#
12# Usage         :   stdin:  Listing of strings.asm,
13#                           assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14#                           We used the listing so that the assembler can take care of
15#                           resolving %define and EQU symbol definitions.
16#
17#                   stdout: StringsCompressed.asm,
18#                           plug replacement for Strings.asm (included by Main.asm)
19#
20#                   Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
25# Translated, Format, and "Normal" characters
26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28#  1. Those in the Translate associative array
29#  2. Those in the Format associative array
30#  3. Characters between $normal_base and $normal_base+0x40
31#     (typically covers upper and lowe case alphabets)
32#  4. Null characters (marking the end of strings)
33#  5. The special string LF,CR
34#
35# If a character or format read at the input cannot be found in one of the above categories,
36# it must be added here before this script will accept it (and DisplayFormatCompressed can
37# display it).
38#
39# Tables for the above categories are expected in the input stream, before string to be
40# compressed are provided.  Note that these tables are not present in DisplayFormatCompressed,
41# and do not need to updated there.  Needed information is put in the compression output
42# that it reads.
43#
44
45#
46# High order code bits, determining which type of character we have (translated or not) and
47# if a space or null should come after this character.
48#
49$code_space = 0xc0;
50$code_null = 0x80;
51$code_normal = 0x40;
52$code_translate = 0x00;
53
54#
55# Bit used if it is a translated byte
56#
57$code_translate_null = 0x00;
58$code_translate_normal = 0x20;
59
60print ";;;======================================================================\n";
61print ";;;\n";
62print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
63print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
64print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
65print ";;;\n";
66print ";;;======================================================================\n\n";
67
68
69#
70# On a first pass, look for our table directives.  $translate{...}, $format{...}, etc.
71# are expectd in the input stream.
72#
73$processed = "    [StringsCompress Processed]";
74while(<>)
75{
76    chop;
77    $o = $_;
78
79    #
80    # Table entries for this script
81    #
82    if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
83    {
84        $translate{ord($2)} = int($3);
85        $o .= $processed;
86    }
87    elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
88    {
89        $translate{int($2)} = int($3);
90        $o .= $processed;
91    }
92    elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
93    {
94        $format_begin = int($2);
95        $o .= $processed;
96    }
97    elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
98    {
99        $format{$2} = int($3);
100        $o .= $processed;
101    }
102    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
103    {
104        $normal_base = hex($2);
105        $o .= $processed;
106    }
107    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
108    {
109        $normal_base = int($2);
110        $o .= $processed;
111    }
112
113    push( @lines, $o );
114}
115
116#
117# On the second pass, loop through lines of the listing, looking for 'db' lines
118# (and dealing with continuations) and compressing each line as it is encountered.
119#
120for( $l = 0; $l < $#lines; $l++ )
121{
122    $_ = $lines[$l];
123
124    #
125    # The <number> indicates a line from an include file, do not include in the output
126    #
127    if( /^\s*\d+\s*\<\d\>/ )
128    {
129    }
130
131    #
132    # a 'db' line, with or without a label
133    #
134    elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
135    {
136        $bytes = $1;
137        $continuation = $2;
138        $label = $3;
139        $spacing = $4;
140        $db = $5;
141        $string = $6;
142
143        print $label.$spacing."; ".$db.$string."\n";
144
145        if( $continuation eq "-" )
146        {
147            do
148            {
149                $_ = $lines[++$l];
150                /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
151                $bytes .= $1;
152                $continuation = $2;
153            }
154            while( $continuation eq "-" );
155        }
156
157        &processString( $bytes, $label.$spacing, $db );
158    }
159   
160    #
161    # everything else, copy to the output as is
162    #
163    elsif( /^\s*\d+\s*(.*)$/ )
164    {
165        print $1."\n";
166    }
167}
168
169print ";;; end of input stream\n\n";
170
171#--------------------------------------------------------------------------------
172#
173# Output constants and the TranslatesAndFormats table
174#
175
176print "StringsCompressed_NormalBase     equ   ".$normal_base."\n\n";
177
178print "StringsCompressed_FormatsBegin   equ   ".$format_begin."\n\n";
179
180print "StringsCompressed_TranslatesAndFormats: \n";
181
182foreach $f (keys(%translate))
183{
184    $translate_index[$translate{$f}] = $f;
185    $used{$f} || die "translate $f unused\n";
186    $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
187}
188
189for( $g = 0; $translate_index[$g]; $g++ )
190{
191    print "        db     ".$translate_index[$g]."  ; ".$g."\n";
192}
193
194foreach $f (keys(%format))
195{
196    $n = $f;
197    $n =~ s/\-/_/g;
198    $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
199    $used{$f} || die "format $f unused\n";
200    $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
201}
202
203for( $t = $format_begin; $format_index[$t]; $t++ )
204{
205    print "        db     (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].")    ; ".$t."\n";
206}
207
208print "\n";
209
210#
211# Ensure that branch targets are within reach
212#
213print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
214for( $t = $format_begin; $format_index[$t]; $t++ )
215{
216    print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
217    print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
218    print "%endif\n";
219}
220print "%endif\n";
221
222#--------------------------------------------------------------------------------
223#
224# Output usage statistics
225#
226
227print "\n;; translated usage stats\n";
228foreach $f (keys(%special))
229{
230    print ";; ".$f.":".$used{$f}."\n";
231    $translate_count++;
232}
233print ";; total translated: ".$translate_count."\n";
234
235print "\n;; format usage stats\n";
236$format_count = 0;
237foreach $f (keys(%format))
238{
239    print ";; ".$f.":".$used{$f}."\n";
240    $format_count++;
241}
242print ";; total format: ".$format_count."\n";
243
244print "\n;; alphabet usage stats\n";
245
246$used_count = 0;
247for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
248{
249    print ";; ".$t.",".chr($t).":".$used{$t}."\n";
250    if( $used{$t} )
251    {
252        $used_count++;
253    }
254}
255print ";; alphabet used count: ".$used_count."\n";
256
257#--------------------------------------------------------------------------------
258#
259# processString does the real compression work...
260#
261
262sub processString
263{
264    $chars = $_[0];
265    $label = $_[1];
266    $db = $_[2];
267
268    $label =~ s/[a-z0-9_:]/ /ig;      # replace with spaces for proper output spacing
269
270    #
271    # Copy numeric bytes out of hexadecimal pairs in the listing
272    #
273    $#v = 0;
274
275    $orig = "";
276    for( $g = 0; $g < length($chars); $g += 2 )
277    {
278        $i = $g/2;
279        $v[$i] = hex(substr($chars,$g,2));
280        $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ",  %02xh"), $v[$i] );
281    }
282    $v[length($chars)/2] = 0xff;      # guard byte to avoid thinking going past the end of
283                                      # the string is a null
284
285    $output = "";
286    #
287    # Loop through bytes...
288    # looking ahead as needed for possible space and null optimizations, compiling formats
289    #
290    for( $g = 0; $g < $#v-1; $g++ )    # -1 for the guard byte
291    {
292        #
293        # Special translation of LF,CR to a format
294        #
295        if( $v[$g] == 10 && $v[$g+1] == 13 )
296        {
297            $g++;
298            $post = $code_translate;
299            $code = $format{"nl"};
300            $used{"nl"}++;
301        }
302
303        #
304        # Format operators
305        #
306        elsif( $v[$g] == 0x25 )    # "%"
307        {
308            $fo = "";
309            $g++;
310            if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
311            {
312                $fo = $fo.chr($v[$g]);
313                $g++;
314            }
315            if( $v[$g] == ord("-") )
316            {
317                $fo = $fo.chr($v[$g]);
318                $g++;
319            }
320            $fo = $fo.chr($v[$g]);
321
322            $format{$fo} || die "unknown format operator: '".$fo."'\n";
323
324            $code = $format{$fo};
325            $post = $code_translate;
326            $used{$fo}++;
327        }
328
329        #
330        # Translated characters
331        #
332        elsif( $v[$g] == 32 || $translate{$v[$g]} )
333        {
334            $post = $code_translate;
335            $code = $translate{$v[$g]};
336            $used{$v[$g]}++;
337        }
338       
339        #
340        # "normal" characters (alphabet, and ASCII characters around the alphabet)
341        #
342        elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
343        {
344            $used{$v[$g]}++;
345
346            $post = $code_normal;
347            $code = $v[$g] - $normal_base;
348        }
349
350        #
351        # Not found
352        #
353        else
354        {
355            die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
356        }
357
358        if( $post == $code_translate )
359        {
360            #
361            # NULL optimization (space optimization not possible on translate/format)
362            #
363            if( $v[$g+1] == 0 )
364            {
365                $g++;
366                $post = $post | $code_translate_null;
367            }
368            else
369            {
370                $post = $post | $code_translate_normal;
371            }
372        }
373        else # $post == $code_normal
374        {
375            #
376            # Space optimization
377            #
378            if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
379            {
380                # can't take this optimization if the next byte is a null,
381                # since we can't have both a postfix space and null
382                $g++;
383                $post = $code_space;
384            }
385
386            #
387            # NULL optimization
388            #
389            elsif( $v[$g+1] == 0 )
390            {
391                $g++;
392                $post = $code_null;
393            }
394        }
395
396        $code = $code | $post;
397        $output .= sprintf( ($code > 0x9f ? ", %03xh" : ",  %02xh"), $code );
398    }
399
400    print $label."; ".$db.substr($orig,2)."    ; uncompressed\n";
401    print $label."  ".$db.substr($output,2);
402    for( $t = length($output); $t < length($orig); $t++ )
403    {
404        print " ";
405    }
406    print "    ; compressed\n\n";
407}
408
Note: See TracBrowser for help on using the repository browser.