source: xtideuniversalbios/trunk/Tools/StringsCompress.pl @ 526

Last change on this file since 526 was 526, checked in by krille_n_@…, 11 years ago

Changes:

  • Update of the copyright notices to include the year 2013.
File size: 11.2 KB
Line 
1#======================================================================================
2#
3# Project name  :   XTIDE Universal BIOS
4#
5# Authors       :   Greg Lindhorst
6#                   gregli@hotmail.com
7#
8# Description   :   Script for compiling and compressing strings for
9#                   use by DisplayFormatCompressed.asm.  See the header of that file
10#                   for a description of the compression scheme.
11#
12# XTIDE Universal BIOS and Associated Tools
13# Copyright (C) 2009-2010 by Tomi Tilli, 2011-2013 by XTIDE Universal BIOS Team.
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23# GNU General Public License for more details.
24# Visit http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
25#
26
27#
28# Usage         :   stdin:  Listing of strings.asm,
29#                           assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
30#                           We used the listing so that the assembler can take care of
31#                           resolving %define and EQU symbol definitions.
32#
33#                   stdout: StringsCompressed.asm,
34#                           plug replacement for Strings.asm (included by Main.asm)
35#
36#                   Also see the XTIDE makefile for building StringsCompressed.asm
37#
38
39#----------------------------------------------------------------------
40#
41# Translated, Format, and "Normal" characters
42#
43# DisplayFormatCompressed can only deal with characters in one of the following categories:
44#  1. Those in the Translate associative array
45#  2. Those in the Format associative array
46#  3. Characters between $normal_base and $normal_base+0x40
47#     (typically covers upper and lower case alphabets)
48#  4. Null characters (marking the end of strings)
49#  5. The special string LF,CR
50#
51# If a character or format read at the input cannot be found in one of the above categories,
52# it must be added here before this script will accept it (and DisplayFormatCompressed can
53# display it).
54#
55# Tables for the above categories are expected in the input stream, before string to be
56# compressed are provided.  Note that these tables are not present in DisplayFormatCompressed,
57# and do not need to be updated there.  Needed information is put in the compression output
58# that it reads.
59#
60
61#
62# High order code bits, determining which type of character we have (translated or not) and
63# if a space or null should come after this character.
64#
65$code_space = 0xc0;
66$code_null = 0x80;
67$code_normal = 0x40;
68$code_translate = 0x00;
69
70#
71# Bit used if it is a translated byte
72#
73$code_translate_null = 0x00;
74$code_translate_normal = 0x20;
75
76print ";;;======================================================================\n";
77print ";;;\n";
78print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
79print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
80print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
81print ";;;\n";
82print ";;;======================================================================\n\n";
83
84print "%ifdef STRINGSCOMPRESSED_STRINGS\n\n";
85
86#
87# On a first pass, look for our table directives.  $translate{...}, $format{...}, etc.
88# are expected in the input stream.
89#
90$processed = "    [StringsCompress Processed]";
91while(<>)
92{
93    chop;
94    $o = $_;
95
96    #
97    # Table entries for this script
98    #
99    if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
100    {
101        $translate{ord($2)} = int($3);
102        $o .= $processed;
103    }
104    elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
105    {
106        $translate{int($2)} = int($3);
107        $o .= $processed;
108    }
109    elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
110    {
111        $format_begin = int($2);
112        $o .= $processed;
113    }
114    elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
115    {
116        $format{$2} = int($3);
117        $o .= $processed;
118    }
119    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
120    {
121        $normal_base = hex($2);
122        $o .= $processed;
123    }
124    elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
125    {
126        $normal_base = int($2);
127        $o .= $processed;
128    }
129
130    push( @lines, $o );
131}
132
133#
134# On the second pass, loop through lines of the listing, looking for 'db' lines
135# (and dealing with continuations) and compressing each line as it is encountered.
136#
137for( $l = 0; $l < $#lines; $l++ )
138{
139    $_ = $lines[$l];
140
141    #
142    # The <number> indicates a line from an include file, do not include in the output
143    #
144    if( /^\s*\d+\s*\<\d\>/ )
145    {
146    }
147
148    #
149    # a 'db' line, with or without a label
150    #
151    elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
152    {
153        $bytes = $1;
154        $continuation = $2;
155        $label = $3;
156        $spacing = $4;
157        $db = $5;
158        $string = $6;
159
160        print $label.$spacing."; ".$db.$string."\n";
161
162        if( $continuation eq "-" )
163        {
164            do
165            {
166                $_ = $lines[++$l];
167                /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
168                $bytes .= $1;
169                $continuation = $2;
170            }
171            while( $continuation eq "-" );
172        }
173
174        &processString( $bytes, $label.$spacing, $db );
175    }
176
177    #
178    # a ';%%;' prefix line, copy to output without the prefix
179    #
180    elsif( /^\s*\d+\s*;%%;\s*(.*)$/ )
181    {
182        print $1."\n";
183    }
184
185    #
186    # everything else, copy to the output as is
187    #
188    elsif( /^\s*\d+\s*(.*)$/ )
189    {
190        print $1."\n";
191    }
192}
193
194print ";;; end of input stream\n\n";
195
196#--------------------------------------------------------------------------------
197#
198# Output constants and the TranslatesAndFormats table
199#
200
201print "%endif ; STRINGSCOMPRESSED_STRINGS\n\n";
202print "%ifdef STRINGSCOMPRESSED_TABLES\n\n";
203
204print "StringsCompressed_NormalBase     equ   ".$normal_base."\n\n";
205
206print "StringsCompressed_FormatsBegin   equ   ".$format_begin."\n\n";
207
208print "StringsCompressed_TranslatesAndFormats:\n";
209
210foreach $f (keys(%translate))
211{
212    $translate_index[$translate{$f}] = $f;
213    $used{$f} || die "translate $f unused\n";
214    $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
215}
216
217for( $g = 0; $translate_index[$g]; $g++ )
218{
219    print "        db     ".$translate_index[$g]."  ; ".$g."\n";
220}
221
222foreach $f (keys(%format))
223{
224    $n = $f;
225    $n =~ s/\-/_/g;
226    $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
227    $used{$f} || die "format $f unused\n";
228    $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
229}
230
231for( $t = $format_begin; $format_index[$t]; $t++ )
232{
233    print "        db     (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].")    ; ".$t."\n";
234}
235
236print "\n";
237
238#
239# Ensure that branch targets are within reach
240#
241print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
242for( $t = $format_begin; $format_index[$t]; $t++ )
243{
244    print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
245    print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
246    print "%endif\n";
247}
248print "%endif\n";
249
250#--------------------------------------------------------------------------------
251#
252# Output usage statistics
253#
254
255print "\n;; translated usage stats\n";
256foreach $f (keys(%translate))
257{
258    print ";; ".$f.":".$used{$f}."\n";
259    $translate_count++;
260}
261print ";; total translated: ".$translate_count."\n";
262
263print "\n;; format usage stats\n";
264$format_count = 0;
265foreach $f (keys(%format))
266{
267    print ";; ".$f.":".$used{$f}."\n";
268    $format_count++;
269}
270print ";; total format: ".$format_count."\n";
271
272print "\n;; alphabet usage stats\n";
273
274$used_count = 0;
275for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
276{
277    print ";; ".$t.",".chr($t).":".$used{$t}."\n";
278    if( $used{$t} )
279    {
280        $used_count++;
281    }
282}
283print ";; alphabet used count: ".$used_count."\n";
284
285print "%endif ; STRINGSCOMPRESSED_TABLES\n\n";
286
287#--------------------------------------------------------------------------------
288#
289# processString does the real compression work...
290#
291
292sub processString
293{
294    $chars = $_[0];
295    $label = $_[1];
296    $db = $_[2];
297
298    $label =~ s/[a-z0-9_:]/ /ig;      # replace with spaces for proper output spacing
299
300    #
301    # Copy numeric bytes out of hexadecimal pairs in the listing
302    #
303    $#v = 0;
304
305    $orig = "";
306    for( $g = 0; $g < length($chars); $g += 2 )
307    {
308        $i = $g/2;
309        $v[$i] = hex(substr($chars,$g,2));
310        $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ",  %02xh"), $v[$i] );
311    }
312    $v[length($chars)/2] = 0xff;      # guard byte to avoid thinking going past the end of
313                                      # the string is a null
314
315    $output = "";
316    #
317    # Loop through bytes...
318    # looking ahead as needed for possible space and null optimizations, compiling formats
319    #
320    for( $g = 0; $g < $#v; $g++ )
321    {
322        #
323        # Special translation of LF,CR to a format
324        #
325        if( $v[$g] == 10 && $v[$g+1] == 13 )
326        {
327            $g++;
328            $post = $code_translate;
329            $code = $format{"nl"};
330            $used{"nl"}++;
331        }
332
333        #
334        # Format operators
335        #
336        elsif( $v[$g] == 0x25 )    # "%"
337        {
338            $fo = "";
339            $g++;
340            if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
341            {
342                $fo = $fo.chr($v[$g]);
343                $g++;
344            }
345            if( $v[$g] == ord("-") )
346            {
347                $fo = $fo.chr($v[$g]);
348                $g++;
349            }
350            $fo = $fo.chr($v[$g]);
351
352            $format{$fo} || die "unknown format operator: '".$fo."'\n";
353
354            $code = $format{$fo};
355            $post = $code_translate;
356            $used{$fo}++;
357        }
358
359        #
360        # Translated characters
361        #
362        elsif( $v[$g] == 32 || $translate{$v[$g]} )
363        {
364            $post = $code_translate;
365            $code = $translate{$v[$g]};
366            $used{$v[$g]}++;
367        }
368
369        #
370        # "normal" characters (alphabet, and ASCII characters around the alphabet)
371        #
372        elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
373        {
374            $used{$v[$g]}++;
375
376            $post = $code_normal;
377            $code = $v[$g] - $normal_base;
378        }
379
380        #
381        # Not found
382        #
383        else
384        {
385            die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
386        }
387
388        if( $post == $code_translate )
389        {
390            #
391            # NULL optimization (space optimization not possible on translate/format)
392            #
393            if( $v[$g+1] == 0 )
394            {
395                $g++;
396                $post = $post | $code_translate_null;
397            }
398            else
399            {
400                $post = $post | $code_translate_normal;
401            }
402        }
403        else # $post == $code_normal
404        {
405            #
406            # Space optimization
407            #
408            if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
409            {
410                # can't take this optimization if the next byte is a null,
411                # since we can't have both a postfix space and null
412                $g++;
413                $post = $code_space;
414            }
415
416            #
417            # NULL optimization
418            #
419            elsif( $v[$g+1] == 0 )
420            {
421                $g++;
422                $post = $code_null;
423            }
424        }
425
426        $code = $code | $post;
427        $output .= sprintf( ($code > 0x9f ? ", %03xh" : ",  %02xh"), $code );
428    }
429
430    print $label."; ".$db.substr($orig,2)."    ; uncompressed\n";
431    print $label."  ".$db.substr($output,2);
432    for( $t = length($output); $t < length($orig); $t++ )
433    {
434        print " ";
435    }
436    print "    ; compressed\n\n";
437}
438
Note: See TracBrowser for help on using the repository browser.