source: xtideuniversalbios/trunk/XTIDE_Universal_BIOS/Src/StringsCompress.pl@ 195

Last change on this file since 195 was 194, checked in by gregli@…, 13 years ago

ifdef'd out more unused code. Also added a tool for looking through the listing and the output of the precompiler to aid in finding dead code. Some changes in the files are to add annotations for the tool to avoid false positives.

File size: 10.5 KB
Line 
1#======================================================================================
2#
3# Project name : XTIDE Universal BIOS
4#
5# Authors : Greg Lindhorst
6# gregli@hotmail.com
7#
8# Description : Script for compiling and compressing strings for
9# use by DisplayFormatCompressed.asm. See the header of that file
10# for a description of the compression scheme.
11#
12# Usage : stdin: Listing of strings.asm,
13# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14# We used the listing so that the assembler can take care of
15# resolving %define and EQU symbol definitions.
16#
17# stdout: StringsCompressed.asm,
18# plug replacement for Strings.asm (included by Main.asm)
19#
20# Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
25# Translated and Format characters
26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28# 1. Those in the Translate associative array
29# 2. Those in the Format associative array
30# 3. Characters between $normal_base and $normal_base+0x40
31# 4. Null characters (marking the end of strings)
32# 5. The special string LF,CR
33#
34# If a character or format read at the input cannot be found in one of the above categories,
35# it must be added here before this script will accept it (and DisplayFormatCompressed can
36# display it).
37#
38# Note that these tables are not present in DisplayFormatCompressed, and do not need to
39# updated there. Needed information is put in the compression output that it reads.
40#
41$translate{ord(' ')} = 0;
42$translate{172} = 1; # ONE_QUARTER
43$translate{171} = 2; # ONE_HALF
44$translate{179} = 3; # SINGLE_VERTICAL
45$translate{175} = 4; # ANGLE_QUOTE_RIGHT
46$translate{ord('!')} = 5;
47$translate{ord('"')} = 6;
48$translate{ord(',')} = 7;
49$translate{ord('-')} = 8;
50$translate{ord('.')} = 9;
51$translate{ord('/')} = 10;
52$translate{ord('1')} = 11;
53$translate{ord('2')} = 12;
54$translate{ord('3')} = 13;
55$translate{ord('4')} = 14;
56$translate{ord('5')} = 15;
57$translate{ord('6')} = 16;
58$translate{ord('8')} = 17;
59$translate{200} = 18; # DOUBLE_BOTTOM_LEFT_CORNER
60$translate{181} = 19; # DOUBLE_LEFT_HORIZONTAL_TO_SINGLE_VERTICAL
61
62#
63# Formats begin immediately after the last Translated character (they are in the same table)
64#
65$format_begin = 20;
66
67$format{"s"} = 20; # n/a
68$format{"c"} = 21; # n/a
69$format{"2-I"} = 22; # must be even
70$format{"u"} = 23; # must be odd
71$format{"5-u"} = 24; # must be even
72$format{"x"} = 25; # must be odd
73$format{"5-x"} = 26; # must be even
74$format{"nl"} = 27; # n/a
75$format{"2-u"} = 28; # must be even
76$format{"A"} = 29; # n/a
77
78# NOTE: The last $format cannot exceed 31 (stored in a 5-bit quantity).
79
80#
81# Starting point for the "normal" range, typically around 0x40 to cover upper and lower case
82# letters. If lower case 'z' is not used, 0x3a can be a good choice as it adds ':' to the
83# front end.
84#
85$normal_base = 0x3a;
86
87#
88# High order code bits, determining which type of character we have (translated or not) and
89# if a space or null should come after this character.
90#
91$code_space = 0xc0;
92$code_null = 0x80;
93$code_normal = 0x40;
94$code_translate = 0x00;
95
96#
97# Bit used if it is a translated byte
98#
99$code_translate_null = 0x00;
100$code_translate_normal = 0x20;
101
102print ";;;======================================================================\n";
103print ";;;\n";
104print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
105print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
106print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
107print ";;;\n";
108print ";;;======================================================================\n\n";
109
110#
111# Loop through lines of the listing, looking for 'db' lines (and dealing with continuations)
112# and compressing each line as it is encountered.
113#
114while(<>)
115{
116 #
117 # The <number> indicates a line from an include file, do not include in the output
118 #
119 if( /^\s*\d+\s*\<\d\>/ )
120 {
121 }
122
123 #
124 # a 'db' line, with or without a label
125 #
126 elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
127 {
128 $bytes = $1;
129 $continuation = $2;
130 $label = $3;
131 $spacing = $4;
132 $db = $5;
133 $string = $6;
134
135 print $label.$spacing."; ".$db.$string."\n";
136
137 if( $continuation eq "-" )
138 {
139 do
140 {
141 $_ = <>;
142 /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation";
143 $bytes .= $1;
144 $continuation = $2;
145 }
146 while( $continuation eq "-" );
147 }
148
149 &processString( $bytes, $label.$spacing, $db );
150 }
151
152 #
153 # everything else, copy to the output as is
154 #
155 elsif( /^\s*\d+\s*(.*)$/ )
156 {
157 print $1."\n";
158 }
159}
160
161print ";;; end of strings.asm\n\n";
162
163#--------------------------------------------------------------------------------
164#
165# Output constants and the TranslatesAndFormats table
166#
167
168print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
169
170print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
171
172print "StringsCompressed_TranslatesAndFormats: \n";
173
174foreach $f (keys(%translate))
175{
176 $translate_index[$translate{$f}] = $f;
177 $used{$f} || print "translate $f unused\n";
178 $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
179}
180
181for( $g = 0; $translate_index[$g]; $g++ )
182{
183 print " db ".$translate_index[$g]." ; ".$g."\n";
184}
185
186foreach $f (keys(%format))
187{
188 $n = $f;
189 $n =~ s/\-/_/g;
190 $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
191 $used{$f} || die "format $f unused\n";
192 $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
193}
194
195for( $t = $format_begin; $format_index[$t]; $t++ )
196{
197 print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
198}
199
200print "\n";
201
202#
203# Ensure that branch targets are within reach
204#
205print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
206for( $t = $format_begin; $format_index[$t]; $t++ )
207{
208 print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
209 print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
210 print "%endif\n";
211}
212print "%endif\n";
213
214#--------------------------------------------------------------------------------
215#
216# Output usage statistics
217#
218
219print "\n;; translated usage stats\n";
220foreach $f (keys(%special))
221{
222 print ";; ".$f.":".$used{$f}."\n";
223 $translate_count++;
224}
225print ";; total translated: ".$translate_count."\n";
226
227print "\n;; format usage stats\n";
228$format_count = 0;
229foreach $f (keys(%format))
230{
231 print ";; ".$f.":".$used{$f}."\n";
232 $format_count++;
233}
234print ";; total format: ".$format_count."\n";
235
236print "\n;; alphabet usage stats\n";
237
238$used_count = 0;
239for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
240{
241 print ";; ".$t.",".chr($t).":".$used{$t}."\n";
242 if( $used{$t} )
243 {
244 $used_count++;
245 }
246}
247print ";; alphabet used count: ".$used_count."\n";
248
249#--------------------------------------------------------------------------------
250#
251# processString does the real compression work...
252#
253
254sub processString
255{
256 $chars = $_[0];
257 $label = $_[1];
258 $db = $_[2];
259
260 $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
261
262 #
263 # Copy numeric bytes out of hexadecimal pairs in the listing
264 #
265 $#v = 0;
266
267 $orig = "";
268 for( $g = 0; $g < length($chars); $g += 2 )
269 {
270 $i = $g/2;
271 $v[$i] = hex(substr($chars,$g,2));
272 $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
273 }
274 $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
275 # the string is a null
276
277 $output = "";
278 #
279 # Loop through bytes...
280 # looking ahead as needed for possible space and null optimizations, compiling formats
281 #
282 for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
283 {
284 #
285 # Special translation of LF,CR to a format
286 #
287 if( $v[$g] == 10 && $v[$g+1] == 13 )
288 {
289 $g++;
290 $post = $code_translate;
291 $code = $format{"nl"};
292 $used{"nl"}++;
293 }
294
295 #
296 # Format operators
297 #
298 elsif( $v[$g] == 0x25 ) # "%"
299 {
300 $fo = "";
301 $g++;
302 if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
303 {
304 $fo = $fo.chr($v[$g]);
305 $g++;
306 }
307 if( $v[$g] == ord("-") )
308 {
309 $fo = $fo.chr($v[$g]);
310 $g++;
311 }
312 $fo = $fo.chr($v[$g]);
313
314 $format{$fo} || die "unknown format operator: '".$fo."'\n";
315
316 $code = $format{$fo};
317 $post = $code_translate;
318 $used{$fo}++;
319 }
320
321 #
322 # Translated characters
323 #
324 elsif( $v[$g] == 32 || $translate{$v[$g]} )
325 {
326 $post = $code_translate;
327 $code = $translate{$v[$g]};
328 $used{$v[$g]}++;
329 }
330
331 #
332 # "normal" characters (alphabet, and ASCII characters around the alphabet)
333 #
334 elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
335 {
336 $used{$v[$g]}++;
337
338 $post = $code_normal;
339 $code = $v[$g] - $normal_base;
340 }
341
342 #
343 # Not found
344 #
345 else
346 {
347 die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
348 }
349
350 if( $post == $code_translate )
351 {
352 #
353 # NULL optimization (space optimization not possible on translate/format)
354 #
355 if( $v[$g+1] == 0 )
356 {
357 $g++;
358 $post = $post | $code_translate_null;
359 }
360 else
361 {
362 $post = $post | $code_translate_normal;
363 }
364 }
365 else # $post == $code_normal
366 {
367 #
368 # Space optimization
369 #
370 if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
371 {
372 # can't take this optimization if the next byte is a null,
373 # since we can't have both a postfix space and null
374 $g++;
375 $post = $code_space;
376 }
377
378 #
379 # NULL optimization
380 #
381 elsif( $v[$g+1] == 0 )
382 {
383 $g++;
384 $post = $code_null;
385 }
386 }
387
388 $code = $code | $post;
389 $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
390 }
391
392 print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
393 print $label." ".$db.substr($output,2);
394 for( $t = length($output); $t < length($orig); $t++ )
395 {
396 print " ";
397 }
398 print " ; compressed\n\n";
399}
400
Note: See TracBrowser for help on using the repository browser.