source: xtideuniversalbios/trunk/Tools/StringsCompress.pl@ 198

Last change on this file since 198 was 197, checked in by gregli@…, 13 years ago

Some maintenance; no changes to the actual source. Moved the compression tables out of the compression script and into the source file, making the compression script source agnostic. And thus moved the compression script to the Tools directory.

File size: 10.2 KB
RevLine 
[189]1#======================================================================================
2#
3# Project name : XTIDE Universal BIOS
4#
5# Authors : Greg Lindhorst
6# gregli@hotmail.com
7#
8# Description : Script for compiling and compressing strings for
9# use by DisplayFormatCompressed.asm. See the header of that file
10# for a description of the compression scheme.
11#
12# Usage : stdin: Listing of strings.asm,
13# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14# We used the listing so that the assembler can take care of
15# resolving %define and EQU symbol definitions.
16#
17# stdout: StringsCompressed.asm,
18# plug replacement for Strings.asm (included by Main.asm)
19#
20# Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
[197]25# Translated, Format, and "Normal" characters
[189]26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28# 1. Those in the Translate associative array
29# 2. Those in the Format associative array
[197]30# 3. Characters between $normal_base and $normal_base+0x40
31# (typically covers upper and lowe case alphabets)
[189]32# 4. Null characters (marking the end of strings)
33# 5. The special string LF,CR
34#
35# If a character or format read at the input cannot be found in one of the above categories,
36# it must be added here before this script will accept it (and DisplayFormatCompressed can
37# display it).
38#
[197]39# Tables for the above categories are expected in the input stream, before string to be
40# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
41# and do not need to updated there. Needed information is put in the compression output
42# that it reads.
[189]43#
44
45#
46# High order code bits, determining which type of character we have (translated or not) and
47# if a space or null should come after this character.
48#
49$code_space = 0xc0;
50$code_null = 0x80;
51$code_normal = 0x40;
52$code_translate = 0x00;
53
54#
55# Bit used if it is a translated byte
56#
57$code_translate_null = 0x00;
58$code_translate_normal = 0x20;
59
60print ";;;======================================================================\n";
61print ";;;\n";
62print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
63print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
64print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
65print ";;;\n";
66print ";;;======================================================================\n\n";
67
[197]68
[189]69#
[197]70# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
71# are expectd in the input stream.
[189]72#
[197]73$processed = " [StringsCompress Processed]";
[189]74while(<>)
75{
[197]76 chop;
77 $o = $_;
78
[189]79 #
[197]80 # Table entries for this script
81 #
82 if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
83 {
84 $translate{ord($2)} = int($3);
85 $o .= $processed;
86 }
87 elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
88 {
89 $translate{int($2)} = int($3);
90 $o .= $processed;
91 }
92 elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
93 {
94 $format_begin = int($2);
95 $o .= $processed;
96 }
97 elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
98 {
99 $format{$2} = int($3);
100 $o .= $processed;
101 }
102 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
103 {
104 $normal_base = hex($2);
105 $o .= $processed;
106 }
107 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
108 {
109 $normal_base = int($2);
110 $o .= $processed;
111 }
112
113 push( @lines, $o );
114}
115
116#
117# On the second pass, loop through lines of the listing, looking for 'db' lines
118# (and dealing with continuations) and compressing each line as it is encountered.
119#
120for( $l = 0; $l < $#lines; $l++ )
121{
122 $_ = $lines[$l];
123
124 #
[189]125 # The <number> indicates a line from an include file, do not include in the output
126 #
127 if( /^\s*\d+\s*\<\d\>/ )
128 {
129 }
130
131 #
132 # a 'db' line, with or without a label
133 #
134 elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
135 {
136 $bytes = $1;
137 $continuation = $2;
138 $label = $3;
139 $spacing = $4;
140 $db = $5;
141 $string = $6;
142
143 print $label.$spacing."; ".$db.$string."\n";
144
145 if( $continuation eq "-" )
146 {
147 do
148 {
[197]149 $_ = $lines[++$l];
150 /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
[189]151 $bytes .= $1;
152 $continuation = $2;
153 }
154 while( $continuation eq "-" );
155 }
156
157 &processString( $bytes, $label.$spacing, $db );
158 }
159
160 #
161 # everything else, copy to the output as is
162 #
163 elsif( /^\s*\d+\s*(.*)$/ )
164 {
165 print $1."\n";
166 }
167}
168
[197]169print ";;; end of input stream\n\n";
[189]170
171#--------------------------------------------------------------------------------
172#
173# Output constants and the TranslatesAndFormats table
174#
175
176print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
177
178print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
179
180print "StringsCompressed_TranslatesAndFormats: \n";
181
182foreach $f (keys(%translate))
183{
184 $translate_index[$translate{$f}] = $f;
[196]185 $used{$f} || die "translate $f unused\n";
[189]186 $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
187}
188
189for( $g = 0; $translate_index[$g]; $g++ )
190{
191 print " db ".$translate_index[$g]." ; ".$g."\n";
192}
193
194foreach $f (keys(%format))
195{
196 $n = $f;
197 $n =~ s/\-/_/g;
198 $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
199 $used{$f} || die "format $f unused\n";
200 $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
201}
202
203for( $t = $format_begin; $format_index[$t]; $t++ )
204{
205 print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
206}
207
208print "\n";
209
210#
211# Ensure that branch targets are within reach
212#
[194]213print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
[189]214for( $t = $format_begin; $format_index[$t]; $t++ )
215{
216 print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
217 print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
218 print "%endif\n";
219}
[194]220print "%endif\n";
[189]221
222#--------------------------------------------------------------------------------
223#
224# Output usage statistics
225#
226
227print "\n;; translated usage stats\n";
228foreach $f (keys(%special))
229{
230 print ";; ".$f.":".$used{$f}."\n";
231 $translate_count++;
232}
233print ";; total translated: ".$translate_count."\n";
234
235print "\n;; format usage stats\n";
236$format_count = 0;
237foreach $f (keys(%format))
238{
239 print ";; ".$f.":".$used{$f}."\n";
240 $format_count++;
241}
242print ";; total format: ".$format_count."\n";
243
244print "\n;; alphabet usage stats\n";
245
246$used_count = 0;
247for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
248{
249 print ";; ".$t.",".chr($t).":".$used{$t}."\n";
250 if( $used{$t} )
251 {
252 $used_count++;
253 }
254}
255print ";; alphabet used count: ".$used_count."\n";
256
257#--------------------------------------------------------------------------------
258#
259# processString does the real compression work...
260#
261
262sub processString
263{
264 $chars = $_[0];
265 $label = $_[1];
266 $db = $_[2];
267
268 $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
269
270 #
271 # Copy numeric bytes out of hexadecimal pairs in the listing
272 #
273 $#v = 0;
274
275 $orig = "";
276 for( $g = 0; $g < length($chars); $g += 2 )
277 {
278 $i = $g/2;
279 $v[$i] = hex(substr($chars,$g,2));
280 $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
281 }
282 $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
283 # the string is a null
284
285 $output = "";
286 #
287 # Loop through bytes...
288 # looking ahead as needed for possible space and null optimizations, compiling formats
289 #
290 for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
291 {
292 #
293 # Special translation of LF,CR to a format
294 #
295 if( $v[$g] == 10 && $v[$g+1] == 13 )
296 {
297 $g++;
298 $post = $code_translate;
299 $code = $format{"nl"};
300 $used{"nl"}++;
301 }
302
303 #
304 # Format operators
305 #
306 elsif( $v[$g] == 0x25 ) # "%"
307 {
308 $fo = "";
309 $g++;
310 if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
311 {
312 $fo = $fo.chr($v[$g]);
313 $g++;
314 }
315 if( $v[$g] == ord("-") )
316 {
317 $fo = $fo.chr($v[$g]);
318 $g++;
319 }
320 $fo = $fo.chr($v[$g]);
321
322 $format{$fo} || die "unknown format operator: '".$fo."'\n";
323
324 $code = $format{$fo};
325 $post = $code_translate;
326 $used{$fo}++;
327 }
328
329 #
330 # Translated characters
331 #
332 elsif( $v[$g] == 32 || $translate{$v[$g]} )
333 {
334 $post = $code_translate;
335 $code = $translate{$v[$g]};
336 $used{$v[$g]}++;
337 }
338
339 #
340 # "normal" characters (alphabet, and ASCII characters around the alphabet)
341 #
342 elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
343 {
344 $used{$v[$g]}++;
345
346 $post = $code_normal;
347 $code = $v[$g] - $normal_base;
348 }
349
350 #
351 # Not found
352 #
353 else
354 {
355 die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
356 }
357
358 if( $post == $code_translate )
359 {
360 #
361 # NULL optimization (space optimization not possible on translate/format)
362 #
363 if( $v[$g+1] == 0 )
364 {
365 $g++;
366 $post = $post | $code_translate_null;
367 }
368 else
369 {
370 $post = $post | $code_translate_normal;
371 }
372 }
373 else # $post == $code_normal
374 {
375 #
376 # Space optimization
377 #
378 if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
379 {
380 # can't take this optimization if the next byte is a null,
381 # since we can't have both a postfix space and null
382 $g++;
383 $post = $code_space;
384 }
385
386 #
387 # NULL optimization
388 #
389 elsif( $v[$g+1] == 0 )
390 {
391 $g++;
392 $post = $code_null;
393 }
394 }
395
396 $code = $code | $post;
397 $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
398 }
399
400 print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
401 print $label." ".$db.substr($output,2);
402 for( $t = length($output); $t < length($orig); $t++ )
403 {
404 print " ";
405 }
406 print " ; compressed\n\n";
407}
408
Note: See TracBrowser for help on using the repository browser.