source: xtideuniversalbios/trunk/Tools/StringsCompress.pl @ 492

Last change on this file since 492 was 492, checked in by gregli@…, 11 years ago

Removed the dependency between MODULE_BOOT_MENU and MODULE_HOTKEYS. With these changes, 0, 1, or 2 of them can be included in a build. This change also means that the hotkeys don't work while the menu is up. But the most important hotkey there was for Rom Boot, and that has been added to the menu as a choice proper. Lots of changes across the board in the hotkeys code - even if we eventually back this change out (becaue, for example we want hotkeys to work in the menu) we should probably start from this base and add that functionality back in, as these changes results in approximately 120 bytes of savings and includes new functionality, such as the Rom Boot menu item and the Com Detect hotkey.

File size: 11.2 KB
Line 
1#======================================================================================
2#
3# Project name  :   XTIDE Universal BIOS
4#
5# Authors       :   Greg Lindhorst
6#                   gregli@hotmail.com
7#
8# Description   :   Script for compiling and compressing strings for
9#                   use by DisplayFormatCompressed.asm.  See the header of that file
10#                   for a description of the compression scheme.
11#
12# XTIDE Universal BIOS and Associated Tools
13# Copyright (C) 2009-2010 by Tomi Tilli, 2011-2012 by XTIDE Universal BIOS Team.
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23# GNU General Public License for more details.     
24# Visit http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
25#
26
27#
28# Usage         :   stdin:  Listing of strings.asm,
29#                           assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
30#                           We used the listing so that the assembler can take care of
31#                           resolving %define and EQU symbol definitions.
32#
33#                   stdout: StringsCompressed.asm,
34#                           plug replacement for Strings.asm (included by Main.asm)
35#
36#                   Also see the XTIDE makefile for building StringsCompressed.asm
37#
38
39#----------------------------------------------------------------------
40#
41# Translated, Format, and "Normal" characters
42#
43# DisplayFormatCompressed can only deal with characters in one of the following categories:
44#  1. Those in the Translate associative array
45#  2. Those in the Format associative array
46#  3. Characters between $normal_base and $normal_base+0x40
47#     (typically covers upper and lower case alphabets)
48#  4. Null characters (marking the end of strings)
49#  5. The special string LF,CR
50#
51# If a character or format read at the input cannot be found in one of the above categories,
52# it must be added here before this script will accept it (and DisplayFormatCompressed can
53# display it).
54#
55# Tables for the above categories are expected in the input stream, before string to be
56# compressed are provided.  Note that these tables are not present in DisplayFormatCompressed,
57# and do not need to be updated there.  Needed information is put in the compression output
58# that it reads.
59#
60
61#
62# High order code bits, determining which type of character we have (translated or not) and
63# if a space or null should come after this character.
64#
65$code_space = 0xc0;
66$code_null = 0x80;
67$code_normal = 0x40;
68$code_translate = 0x00;
69
70#
71# Bit used if it is a translated byte
72#
73$code_translate_null = 0x00;
74$code_translate_normal = 0x20;
75
76print ";;;======================================================================\n";
77print ";;;\n";
78print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
79print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
80print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
81print ";;;\n";
82print ";;;======================================================================\n\n";
83
84print "%ifdef STRINGSCOMPRESSED_STRINGS\n\n";
85
86#
87# On a first pass, look for our table directives.  $translate{...}, $format{...}, etc.
88# are expected in the input stream.
89#
90$processed = "    [StringsCompress Processed]";
91while(<>)
92{
93    chop;
94    $o = $_;
95
96    #
97    # Table entries for this script
98    #
99    if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
100    {
101        $translate{ord($2)} = int($3);
102        $o .= $processed;
103    }
104    elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
105    {
106        $translate{int($2)} = int($3);
107        $o .= $processed;
108    }
109    elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
110    {
111        $format_begin = int($2);
112        $o .= $processed;
113    }
114    elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
115    {
116        $format{$2} = int($3);
117        $o .= $processed;
118    }
119    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
120    {
121        $normal_base = hex($2);
122        $o .= $processed;
123    }
124    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
125    {
126        $normal_base = int($2);
127        $o .= $processed;
128    }
129
130    push( @lines, $o );
131}
132
133#
134# On the second pass, loop through lines of the listing, looking for 'db' lines
135# (and dealing with continuations) and compressing each line as it is encountered.
136#
137for( $l = 0; $l < $#lines; $l++ )
138{
139    $_ = $lines[$l];
140
141    #
142    # The <number> indicates a line from an include file, do not include in the output
143    #
144    if( /^\s*\d+\s*\<\d\>/ )
145    {
146    }
147
148    #
149    # a 'db' line, with or without a label
150    #
151    elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
152    {
153        $bytes = $1;
154        $continuation = $2;
155        $label = $3;
156        $spacing = $4;
157        $db = $5;
158        $string = $6;
159
160        print $label.$spacing."; ".$db.$string."\n";
161
162        if( $continuation eq "-" )
163        {
164            do
165            {
166                $_ = $lines[++$l];
167                /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
168                $bytes .= $1;
169                $continuation = $2;
170            }
171            while( $continuation eq "-" );
172        }
173
174        &processString( $bytes, $label.$spacing, $db );
175    }
176
177    #
178    # a ';%%;' prefix line, copy to output without the prefix
179    #
180    elsif( /^\s*\d+\s*;%%;\s*(.*)$/ )
181    {
182        print $1."\n";
183    }
184
185    #
186    # everything else, copy to the output as is
187    #
188    elsif( /^\s*\d+\s*(.*)$/ )
189    {
190        print $1."\n";
191    }
192}
193
194print ";;; end of input stream\n\n";
195
196#--------------------------------------------------------------------------------
197#
198# Output constants and the TranslatesAndFormats table
199#
200
201print "%endif ; STRINGSCOMPRESSED_STRINGS\n\n";
202print "%ifdef STRINGSCOMPRESSED_TABLES\n\n";
203
204print "StringsCompressed_NormalBase     equ   ".$normal_base."\n\n";
205
206print "StringsCompressed_FormatsBegin   equ   ".$format_begin."\n\n";
207
208print "StringsCompressed_TranslatesAndFormats: \n";
209
210foreach $f (keys(%translate))
211{
212    $translate_index[$translate{$f}] = $f;
213    $used{$f} || die "translate $f unused\n";
214    $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
215}
216
217for( $g = 0; $translate_index[$g]; $g++ )
218{
219    print "        db     ".$translate_index[$g]."  ; ".$g."\n";
220}
221
222foreach $f (keys(%format))
223{
224    $n = $f;
225    $n =~ s/\-/_/g;
226    $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
227    $used{$f} || die "format $f unused\n";
228    $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
229}
230
231for( $t = $format_begin; $format_index[$t]; $t++ )
232{
233    print "        db     (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].")    ; ".$t."\n";
234}
235
236print "\n";
237
238#
239# Ensure that branch targets are within reach
240#
241print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
242for( $t = $format_begin; $format_index[$t]; $t++ )
243{
244    print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
245    print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
246    print "%endif\n";
247}
248print "%endif\n";
249
250#--------------------------------------------------------------------------------
251#
252# Output usage statistics
253#
254
255print "\n;; translated usage stats\n";
256foreach $f (keys(%translate))
257{
258    print ";; ".$f.":".$used{$f}."\n";
259    $translate_count++;
260}
261print ";; total translated: ".$translate_count."\n";
262
263print "\n;; format usage stats\n";
264$format_count = 0;
265foreach $f (keys(%format))
266{
267    print ";; ".$f.":".$used{$f}."\n";
268    $format_count++;
269}
270print ";; total format: ".$format_count."\n";
271
272print "\n;; alphabet usage stats\n";
273
274$used_count = 0;
275for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
276{
277    print ";; ".$t.",".chr($t).":".$used{$t}."\n";
278    if( $used{$t} )
279    {
280        $used_count++;
281    }
282}
283print ";; alphabet used count: ".$used_count."\n";
284
285print "%endif ; STRINGSCOMPRESSED_TABLES\n\n";
286
287#--------------------------------------------------------------------------------
288#
289# processString does the real compression work...
290#
291
292sub processString
293{
294    $chars = $_[0];
295    $label = $_[1];
296    $db = $_[2];
297
298    $label =~ s/[a-z0-9_:]/ /ig;      # replace with spaces for proper output spacing
299
300    #
301    # Copy numeric bytes out of hexadecimal pairs in the listing
302    #
303    $#v = 0;
304
305    $orig = "";
306    for( $g = 0; $g < length($chars); $g += 2 )
307    {
308        $i = $g/2;
309        $v[$i] = hex(substr($chars,$g,2));
310        $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ",  %02xh"), $v[$i] );
311    }
312    $v[length($chars)/2] = 0xff;      # guard byte to avoid thinking going past the end of
313                                      # the string is a null
314
315    $output = "";
316    #
317    # Loop through bytes...
318    # looking ahead as needed for possible space and null optimizations, compiling formats
319    #
320    for( $g = 0; $g < $#v; $g++ )
321    {
322        #
323        # Special translation of LF,CR to a format
324        #
325        if( $v[$g] == 10 && $v[$g+1] == 13 )
326        {
327            $g++;
328            $post = $code_translate;
329            $code = $format{"nl"};
330            $used{"nl"}++;
331        }
332
333        #
334        # Format operators
335        #
336        elsif( $v[$g] == 0x25 )    # "%"
337        {
338            $fo = "";
339            $g++;
340            if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
341            {
342                $fo = $fo.chr($v[$g]);
343                $g++;
344            }
345            if( $v[$g] == ord("-") )
346            {
347                $fo = $fo.chr($v[$g]);
348                $g++;
349            }
350            $fo = $fo.chr($v[$g]);
351
352            $format{$fo} || die "unknown format operator: '".$fo."'\n";
353
354            $code = $format{$fo};
355            $post = $code_translate;
356            $used{$fo}++;
357        }
358
359        #
360        # Translated characters
361        #
362        elsif( $v[$g] == 32 || $translate{$v[$g]} )
363        {
364            $post = $code_translate;
365            $code = $translate{$v[$g]};
366            $used{$v[$g]}++;
367        }
368
369        #
370        # "normal" characters (alphabet, and ASCII characters around the alphabet)
371        #
372        elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
373        {
374            $used{$v[$g]}++;
375
376            $post = $code_normal;
377            $code = $v[$g] - $normal_base;
378        }
379
380        #
381        # Not found
382        #
383        else
384        {
385            die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
386        }
387
388        if( $post == $code_translate )
389        {
390            #
391            # NULL optimization (space optimization not possible on translate/format)
392            #
393            if( $v[$g+1] == 0 )
394            {
395                $g++;
396                $post = $post | $code_translate_null;
397            }
398            else
399            {
400                $post = $post | $code_translate_normal;
401            }
402        }
403        else # $post == $code_normal
404        {
405            #
406            # Space optimization
407            #
408            if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
409            {
410                # can't take this optimization if the next byte is a null,
411                # since we can't have both a postfix space and null
412                $g++;
413                $post = $code_space;
414            }
415
416            #
417            # NULL optimization
418            #
419            elsif( $v[$g+1] == 0 )
420            {
421                $g++;
422                $post = $code_null;
423            }
424        }
425
426        $code = $code | $post;
427        $output .= sprintf( ($code > 0x9f ? ", %03xh" : ",  %02xh"), $code );
428    }
429
430    print $label."; ".$db.substr($orig,2)."    ; uncompressed\n";
431    print $label."  ".$db.substr($output,2);
432    for( $t = length($output); $t < length($orig); $t++ )
433    {
434        print " ";
435    }
436    print "    ; compressed\n\n";
437}
438
Note: See TracBrowser for help on using the repository browser.