source: xtideuniversalbios/trunk/XTIDE_Universal_BIOS/Src/StringsCompress.pl@ 190

Last change on this file since 190 was 189, checked in by gregli@…, 13 years ago

Additional space optimizations, including making IdleProcessing an option in MENUEVENT. Note the fall-through from one file to another, but that there are assembler checks to ensure the proper linkage is maintained. First version of StringsCompress.pl, a perl script to make StringsCompressed.asm from Strings.asm.

File size: 10.4 KB
RevLine 
[189]1#======================================================================================
2#
3# Project name : XTIDE Universal BIOS
4#
5# Authors : Greg Lindhorst
6# gregli@hotmail.com
7#
8# Description : Script for compiling and compressing strings for
9# use by DisplayFormatCompressed.asm. See the header of that file
10# for a description of the compression scheme.
11#
12# Usage : stdin: Listing of strings.asm,
13# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14# We used the listing so that the assembler can take care of
15# resolving %define and EQU symbol definitions.
16#
17# stdout: StringsCompressed.asm,
18# plug replacement for Strings.asm (included by Main.asm)
19#
20# Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
25# Translated and Format characters
26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28# 1. Those in the Translate associative array
29# 2. Those in the Format associative array
30# 3. Characters between $normal_base and $normal_base+0x40
31# 4. Null characters (marking the end of strings)
32# 5. The special string LF,CR
33#
34# If a character or format read at the input cannot be found in one of the above categories,
35# it must be added here before this script will accept it (and DisplayFormatCompressed can
36# display it).
37#
38# Note that these tables are not present in DisplayFormatCompressed, and do not need to
39# updated there. Needed information is put in the compression output that it reads.
40#
41$translate{ord(' ')} = 0;
42$translate{172} = 1; # ONE_QUARTER
43$translate{171} = 2; # ONE_HALF
44$translate{179} = 3; # SINGLE_VERTICAL
45$translate{175} = 4; # ANGLE_QUOTE_RIGHT
46$translate{ord('!')} = 5;
47$translate{ord('"')} = 6;
48$translate{ord(',')} = 7;
49$translate{ord('-')} = 8;
50$translate{ord('.')} = 9;
51$translate{ord('/')} = 10;
52$translate{ord('1')} = 11;
53$translate{ord('2')} = 12;
54$translate{ord('3')} = 13;
55$translate{ord('4')} = 14;
56$translate{ord('5')} = 15;
57$translate{ord('6')} = 16;
58$translate{ord('8')} = 17;
59$translate{200} = 18; # DOUBLE_BOTTOM_LEFT_CORNER
60$translate{181} = 19; # DOUBLE_LEFT_HORIZONTAL_TO_SINGLE_VERTICAL
61
62#
63# Formats begin immediately after the last Translated character (they are in the same table)
64#
65$format_begin = 20;
66
67$format{"s"} = 20; # n/a
68$format{"c"} = 21; # n/a
69$format{"2-I"} = 22; # must be even
70$format{"u"} = 23; # must be odd
71$format{"5-u"} = 24; # must be even
72$format{"x"} = 25; # must be odd
73$format{"5-x"} = 26; # must be even
74$format{"nl"} = 27; # n/a
75$format{"2-u"} = 28; # must be even
76$format{"A"} = 29; # n/a
77
78# NOTE: The last $format cannot exceed 31 (stored in a 5-bit quantity).
79
80#
81# Starting point for the "normal" range, typically around 0x40 to cover upper and lower case
82# letters. If lower case 'z' is not used, 0x3a can be a good choice as it adds ':' to the
83# front end.
84#
85$normal_base = 0x3a;
86
87#
88# High order code bits, determining which type of character we have (translated or not) and
89# if a space or null should come after this character.
90#
91$code_space = 0xc0;
92$code_null = 0x80;
93$code_normal = 0x40;
94$code_translate = 0x00;
95
96#
97# Bit used if it is a translated byte
98#
99$code_translate_null = 0x00;
100$code_translate_normal = 0x20;
101
102print ";;;======================================================================\n";
103print ";;;\n";
104print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
105print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
106print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
107print ";;;\n";
108print ";;;======================================================================\n\n";
109
110#
111# Loop through lines of the listing, looking for 'db' lines (and dealing with continuations)
112# and compressing each line as it is encountered.
113#
114while(<>)
115{
116 #
117 # The <number> indicates a line from an include file, do not include in the output
118 #
119 if( /^\s*\d+\s*\<\d\>/ )
120 {
121 }
122
123 #
124 # a 'db' line, with or without a label
125 #
126 elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
127 {
128 $bytes = $1;
129 $continuation = $2;
130 $label = $3;
131 $spacing = $4;
132 $db = $5;
133 $string = $6;
134
135 print $label.$spacing."; ".$db.$string."\n";
136
137 if( $continuation eq "-" )
138 {
139 do
140 {
141 $_ = <>;
142 /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation";
143 $bytes .= $1;
144 $continuation = $2;
145 }
146 while( $continuation eq "-" );
147 }
148
149 &processString( $bytes, $label.$spacing, $db );
150 }
151
152 #
153 # everything else, copy to the output as is
154 #
155 elsif( /^\s*\d+\s*(.*)$/ )
156 {
157 print $1."\n";
158 }
159}
160
161print ";;; end of strings.asm\n\n";
162
163#--------------------------------------------------------------------------------
164#
165# Output constants and the TranslatesAndFormats table
166#
167
168print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
169
170print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
171
172print "StringsCompressed_TranslatesAndFormats: \n";
173
174foreach $f (keys(%translate))
175{
176 $translate_index[$translate{$f}] = $f;
177 $used{$f} || print "translate $f unused\n";
178 $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
179}
180
181for( $g = 0; $translate_index[$g]; $g++ )
182{
183 print " db ".$translate_index[$g]." ; ".$g."\n";
184}
185
186foreach $f (keys(%format))
187{
188 $n = $f;
189 $n =~ s/\-/_/g;
190 $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
191 $used{$f} || die "format $f unused\n";
192 $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
193}
194
195for( $t = $format_begin; $format_index[$t]; $t++ )
196{
197 print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
198}
199
200print "\n";
201
202#
203# Ensure that branch targets are within reach
204#
205for( $t = $format_begin; $format_index[$t]; $t++ )
206{
207 print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
208 print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
209 print "%endif\n";
210}
211
212#--------------------------------------------------------------------------------
213#
214# Output usage statistics
215#
216
217print "\n;; translated usage stats\n";
218foreach $f (keys(%special))
219{
220 print ";; ".$f.":".$used{$f}."\n";
221 $translate_count++;
222}
223print ";; total translated: ".$translate_count."\n";
224
225print "\n;; format usage stats\n";
226$format_count = 0;
227foreach $f (keys(%format))
228{
229 print ";; ".$f.":".$used{$f}."\n";
230 $format_count++;
231}
232print ";; total format: ".$format_count."\n";
233
234print "\n;; alphabet usage stats\n";
235
236$used_count = 0;
237for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
238{
239 print ";; ".$t.",".chr($t).":".$used{$t}."\n";
240 if( $used{$t} )
241 {
242 $used_count++;
243 }
244}
245print ";; alphabet used count: ".$used_count."\n";
246
247#--------------------------------------------------------------------------------
248#
249# processString does the real compression work...
250#
251
252sub processString
253{
254 $chars = $_[0];
255 $label = $_[1];
256 $db = $_[2];
257
258 $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
259
260 #
261 # Copy numeric bytes out of hexadecimal pairs in the listing
262 #
263 $#v = 0;
264
265 $orig = "";
266 for( $g = 0; $g < length($chars); $g += 2 )
267 {
268 $i = $g/2;
269 $v[$i] = hex(substr($chars,$g,2));
270 $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
271 }
272 $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
273 # the string is a null
274
275 $output = "";
276 #
277 # Loop through bytes...
278 # looking ahead as needed for possible space and null optimizations, compiling formats
279 #
280 for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
281 {
282 #
283 # Special translation of LF,CR to a format
284 #
285 if( $v[$g] == 10 && $v[$g+1] == 13 )
286 {
287 $g++;
288 $post = $code_translate;
289 $code = $format{"nl"};
290 $used{"nl"}++;
291 }
292
293 #
294 # Format operators
295 #
296 elsif( $v[$g] == 0x25 ) # "%"
297 {
298 $fo = "";
299 $g++;
300 if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
301 {
302 $fo = $fo.chr($v[$g]);
303 $g++;
304 }
305 if( $v[$g] == ord("-") )
306 {
307 $fo = $fo.chr($v[$g]);
308 $g++;
309 }
310 $fo = $fo.chr($v[$g]);
311
312 $format{$fo} || die "unknown format operator: '".$fo."'\n";
313
314 $code = $format{$fo};
315 $post = $code_translate;
316 $used{$fo}++;
317 }
318
319 #
320 # Translated characters
321 #
322 elsif( $v[$g] == 32 || $translate{$v[$g]} )
323 {
324 $post = $code_translate;
325 $code = $translate{$v[$g]};
326 $used{$v[$g]}++;
327 }
328
329 #
330 # "normal" characters (alphabet, and ASCII characters around the alphabet)
331 #
332 elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
333 {
334 $used{$v[$g]}++;
335
336 $post = $code_normal;
337 $code = $v[$g] - $normal_base;
338 }
339
340 #
341 # Not found
342 #
343 else
344 {
345 die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
346 }
347
348 if( $post == $code_translate )
349 {
350 #
351 # NULL optimization (space optimization not possible on translate/format)
352 #
353 if( $v[$g+1] == 0 )
354 {
355 $g++;
356 $post = $post | $code_translate_null;
357 }
358 else
359 {
360 $post = $post | $code_translate_normal;
361 }
362 }
363 else # $post == $code_normal
364 {
365 #
366 # Space optimization
367 #
368 if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
369 {
370 # can't take this optimization if the next byte is a null,
371 # since we can't have both a postfix space and null
372 $g++;
373 $post = $code_space;
374 }
375
376 #
377 # NULL optimization
378 #
379 elsif( $v[$g+1] == 0 )
380 {
381 $g++;
382 $post = $code_null;
383 }
384 }
385
386 $code = $code | $post;
387 $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
388 }
389
390 print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
391 print $label." ".$db.substr($output,2);
392 for( $t = length($output); $t < length($orig); $t++ )
393 {
394 print " ";
395 }
396 print " ; compressed\n\n";
397}
398
Note: See TracBrowser for help on using the repository browser.