source: xtideuniversalbios/tags/Tools_for_v2.0.0_beta1/StringsCompress.pl @ 625

Last change on this file since 625 was 334, checked in by gregli@…, 12 years ago

Serial server DPT flag optimization, remove Serial/IDE specific header on drive scan results, added GNU GPL v2 banner at boot.

File size: 10.6 KB
Line 
1#======================================================================================
2#
3# Project name  :   XTIDE Universal BIOS
4#
5# Authors       :   Greg Lindhorst
6#                   gregli@hotmail.com
7#
8# Description   :   Script for compiling and compressing strings for
9#                   use by DisplayFormatCompressed.asm.  See the header of that file
10#                   for a description of the compression scheme.
11#
12# Usage         :   stdin:  Listing of strings.asm,
13#                           assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14#                           We used the listing so that the assembler can take care of
15#                           resolving %define and EQU symbol definitions.
16#
17#                   stdout: StringsCompressed.asm,
18#                           plug replacement for Strings.asm (included by Main.asm)
19#
20#                   Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
25# Translated, Format, and "Normal" characters
26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28#  1. Those in the Translate associative array
29#  2. Those in the Format associative array
30#  3. Characters between $normal_base and $normal_base+0x40
31#     (typically covers upper and lower case alphabets)
32#  4. Null characters (marking the end of strings)
33#  5. The special string LF,CR
34#
35# If a character or format read at the input cannot be found in one of the above categories,
36# it must be added here before this script will accept it (and DisplayFormatCompressed can
37# display it).
38#
39# Tables for the above categories are expected in the input stream, before string to be
40# compressed are provided.  Note that these tables are not present in DisplayFormatCompressed,
41# and do not need to be updated there.  Needed information is put in the compression output
42# that it reads.
43#
44
45#
46# High order code bits, determining which type of character we have (translated or not) and
47# if a space or null should come after this character.
48#
49$code_space = 0xc0;
50$code_null = 0x80;
51$code_normal = 0x40;
52$code_translate = 0x00;
53
54#
55# Bit used if it is a translated byte
56#
57$code_translate_null = 0x00;
58$code_translate_normal = 0x20;
59
60print ";;;======================================================================\n";
61print ";;;\n";
62print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
63print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
64print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
65print ";;;\n";
66print ";;;======================================================================\n\n";
67
68print "%ifdef STRINGSCOMPRESSED_STRINGS\n\n";
69
70#
71# On a first pass, look for our table directives.  $translate{...}, $format{...}, etc.
72# are expected in the input stream.
73#
74$processed = "    [StringsCompress Processed]";
75while(<>)
76{
77    chop;
78    $o = $_;
79
80    #
81    # Table entries for this script
82    #
83    if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
84    {
85        $translate{ord($2)} = int($3);
86        $o .= $processed;
87    }
88    elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
89    {
90        $translate{int($2)} = int($3);
91        $o .= $processed;
92    }
93    elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
94    {
95        $format_begin = int($2);
96        $o .= $processed;
97    }
98    elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
99    {
100        $format{$2} = int($3);
101        $o .= $processed;
102    }
103    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
104    {
105        $normal_base = hex($2);
106        $o .= $processed;
107    }
108    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
109    {
110        $normal_base = int($2);
111        $o .= $processed;
112    }
113
114    push( @lines, $o );
115}
116
117#
118# On the second pass, loop through lines of the listing, looking for 'db' lines
119# (and dealing with continuations) and compressing each line as it is encountered.
120#
121for( $l = 0; $l < $#lines; $l++ )
122{
123    $_ = $lines[$l];
124
125    #
126    # The <number> indicates a line from an include file, do not include in the output
127    #
128    if( /^\s*\d+\s*\<\d\>/ )
129    {
130    }
131
132    #
133    # a 'db' line, with or without a label
134    #
135    elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
136    {
137        $bytes = $1;
138        $continuation = $2;
139        $label = $3;
140        $spacing = $4;
141        $db = $5;
142        $string = $6;
143
144        print $label.$spacing."; ".$db.$string."\n";
145
146        if( $continuation eq "-" )
147        {
148            do
149            {
150                $_ = $lines[++$l];
151                /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
152                $bytes .= $1;
153                $continuation = $2;
154            }
155            while( $continuation eq "-" );
156        }
157
158        &processString( $bytes, $label.$spacing, $db );
159    }
160
161    #
162    # a ';%%;' prefix line, copy to output without the prefix
163    #
164    elsif( /^\s*\d+\s*;%%;\s*(.*)$/ )
165    {
166        print $1."\n";
167    }
168
169    #
170    # everything else, copy to the output as is
171    #
172    elsif( /^\s*\d+\s*(.*)$/ )
173    {
174        print $1."\n";
175    }
176}
177
178print ";;; end of input stream\n\n";
179
180#--------------------------------------------------------------------------------
181#
182# Output constants and the TranslatesAndFormats table
183#
184
185print "%endif ; STRINGSCOMPRESSED_STRINGS\n\n";
186print "%ifdef STRINGSCOMPRESSED_TABLES\n\n";
187
188print "StringsCompressed_NormalBase     equ   ".$normal_base."\n\n";
189
190print "StringsCompressed_FormatsBegin   equ   ".$format_begin."\n\n";
191
192print "StringsCompressed_TranslatesAndFormats: \n";
193
194foreach $f (keys(%translate))
195{
196    $translate_index[$translate{$f}] = $f;
197    $used{$f} || die "translate $f unused\n";
198    $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
199}
200
201for( $g = 0; $translate_index[$g]; $g++ )
202{
203    print "        db     ".$translate_index[$g]."  ; ".$g."\n";
204}
205
206foreach $f (keys(%format))
207{
208    $n = $f;
209    $n =~ s/\-/_/g;
210    $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
211    $used{$f} || die "format $f unused\n";
212    $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
213}
214
215for( $t = $format_begin; $format_index[$t]; $t++ )
216{
217    print "        db     (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].")    ; ".$t."\n";
218}
219
220print "\n";
221
222#
223# Ensure that branch targets are within reach
224#
225print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
226for( $t = $format_begin; $format_index[$t]; $t++ )
227{
228    print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
229    print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
230    print "%endif\n";
231}
232print "%endif\n";
233
234#--------------------------------------------------------------------------------
235#
236# Output usage statistics
237#
238
239print "\n;; translated usage stats\n";
240foreach $f (keys(%translate))
241{
242    print ";; ".$f.":".$used{$f}."\n";
243    $translate_count++;
244}
245print ";; total translated: ".$translate_count."\n";
246
247print "\n;; format usage stats\n";
248$format_count = 0;
249foreach $f (keys(%format))
250{
251    print ";; ".$f.":".$used{$f}."\n";
252    $format_count++;
253}
254print ";; total format: ".$format_count."\n";
255
256print "\n;; alphabet usage stats\n";
257
258$used_count = 0;
259for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
260{
261    print ";; ".$t.",".chr($t).":".$used{$t}."\n";
262    if( $used{$t} )
263    {
264        $used_count++;
265    }
266}
267print ";; alphabet used count: ".$used_count."\n";
268
269print "%endif ; STRINGSCOMPRESSED_TABLES\n\n";
270
271#--------------------------------------------------------------------------------
272#
273# processString does the real compression work...
274#
275
276sub processString
277{
278    $chars = $_[0];
279    $label = $_[1];
280    $db = $_[2];
281
282    $label =~ s/[a-z0-9_:]/ /ig;      # replace with spaces for proper output spacing
283
284    #
285    # Copy numeric bytes out of hexadecimal pairs in the listing
286    #
287    $#v = 0;
288
289    $orig = "";
290    for( $g = 0; $g < length($chars); $g += 2 )
291    {
292        $i = $g/2;
293        $v[$i] = hex(substr($chars,$g,2));
294        $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ",  %02xh"), $v[$i] );
295    }
296    $v[length($chars)/2] = 0xff;      # guard byte to avoid thinking going past the end of
297                                      # the string is a null
298
299    $output = "";
300    #
301    # Loop through bytes...
302    # looking ahead as needed for possible space and null optimizations, compiling formats
303    #
304    for( $g = 0; $g < $#v-1; $g++ )    # -1 for the guard byte
305    {
306        #
307        # Special translation of LF,CR to a format
308        #
309        if( $v[$g] == 10 && $v[$g+1] == 13 )
310        {
311            $g++;
312            $post = $code_translate;
313            $code = $format{"nl"};
314            $used{"nl"}++;
315        }
316
317        #
318        # Format operators
319        #
320        elsif( $v[$g] == 0x25 )    # "%"
321        {
322            $fo = "";
323            $g++;
324            if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
325            {
326                $fo = $fo.chr($v[$g]);
327                $g++;
328            }
329            if( $v[$g] == ord("-") )
330            {
331                $fo = $fo.chr($v[$g]);
332                $g++;
333            }
334            $fo = $fo.chr($v[$g]);
335
336            $format{$fo} || die "unknown format operator: '".$fo."'\n";
337
338            $code = $format{$fo};
339            $post = $code_translate;
340            $used{$fo}++;
341        }
342
343        #
344        # Translated characters
345        #
346        elsif( $v[$g] == 32 || $translate{$v[$g]} )
347        {
348            $post = $code_translate;
349            $code = $translate{$v[$g]};
350            $used{$v[$g]}++;
351        }
352
353        #
354        # "normal" characters (alphabet, and ASCII characters around the alphabet)
355        #
356        elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
357        {
358            $used{$v[$g]}++;
359
360            $post = $code_normal;
361            $code = $v[$g] - $normal_base;
362        }
363
364        #
365        # Not found
366        #
367        else
368        {
369            die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
370        }
371
372        if( $post == $code_translate )
373        {
374            #
375            # NULL optimization (space optimization not possible on translate/format)
376            #
377            if( $v[$g+1] == 0 )
378            {
379                $g++;
380                $post = $post | $code_translate_null;
381            }
382            else
383            {
384                $post = $post | $code_translate_normal;
385            }
386        }
387        else # $post == $code_normal
388        {
389            #
390            # Space optimization
391            #
392            if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
393            {
394                # can't take this optimization if the next byte is a null,
395                # since we can't have both a postfix space and null
396                $g++;
397                $post = $code_space;
398            }
399
400            #
401            # NULL optimization
402            #
403            elsif( $v[$g+1] == 0 )
404            {
405                $g++;
406                $post = $code_null;
407            }
408        }
409
410        $code = $code | $post;
411        $output .= sprintf( ($code > 0x9f ? ", %03xh" : ",  %02xh"), $code );
412    }
413
414    print $label."; ".$db.substr($orig,2)."    ; uncompressed\n";
415    print $label."  ".$db.substr($output,2);
416    for( $t = length($output); $t < length($orig); $t++ )
417    {
418        print " ";
419    }
420    print "    ; compressed\n\n";
421}
422
Note: See TracBrowser for help on using the repository browser.