source: xtideuniversalbios/trunk/Tools/StringsCompress.pl@ 374

Last change on this file since 374 was 334, checked in by gregli@…, 13 years ago

Serial server DPT flag optimization, remove Serial/IDE specific header on drive scan results, added GNU GPL v2 banner at boot.

File size: 10.6 KB
RevLine 
[189]1#======================================================================================
2#
3# Project name : XTIDE Universal BIOS
4#
5# Authors : Greg Lindhorst
6# gregli@hotmail.com
7#
[242]8# Description : Script for compiling and compressing strings for
[189]9# use by DisplayFormatCompressed.asm. See the header of that file
10# for a description of the compression scheme.
11#
[242]12# Usage : stdin: Listing of strings.asm,
[189]13# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14# We used the listing so that the assembler can take care of
15# resolving %define and EQU symbol definitions.
16#
17# stdout: StringsCompressed.asm,
18# plug replacement for Strings.asm (included by Main.asm)
19#
20# Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
[197]25# Translated, Format, and "Normal" characters
[189]26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28# 1. Those in the Translate associative array
29# 2. Those in the Format associative array
[242]30# 3. Characters between $normal_base and $normal_base+0x40
31# (typically covers upper and lower case alphabets)
[189]32# 4. Null characters (marking the end of strings)
33# 5. The special string LF,CR
34#
[242]35# If a character or format read at the input cannot be found in one of the above categories,
36# it must be added here before this script will accept it (and DisplayFormatCompressed can
[189]37# display it).
38#
[197]39# Tables for the above categories are expected in the input stream, before string to be
[242]40# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
41# and do not need to be updated there. Needed information is put in the compression output
[197]42# that it reads.
[189]43#
44
45#
46# High order code bits, determining which type of character we have (translated or not) and
47# if a space or null should come after this character.
48#
49$code_space = 0xc0;
50$code_null = 0x80;
51$code_normal = 0x40;
52$code_translate = 0x00;
53
54#
55# Bit used if it is a translated byte
56#
57$code_translate_null = 0x00;
58$code_translate_normal = 0x20;
59
60print ";;;======================================================================\n";
61print ";;;\n";
62print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
[242]63print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
[189]64print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
65print ";;;\n";
66print ";;;======================================================================\n\n";
67
[334]68print "%ifdef STRINGSCOMPRESSED_STRINGS\n\n";
[197]69
[189]70#
[242]71# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
72# are expected in the input stream.
[189]73#
[197]74$processed = " [StringsCompress Processed]";
[189]75while(<>)
76{
[197]77 chop;
78 $o = $_;
79
[189]80 #
[197]81 # Table entries for this script
82 #
83 if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
84 {
85 $translate{ord($2)} = int($3);
86 $o .= $processed;
87 }
88 elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
89 {
90 $translate{int($2)} = int($3);
91 $o .= $processed;
92 }
93 elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
94 {
95 $format_begin = int($2);
96 $o .= $processed;
97 }
98 elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
99 {
100 $format{$2} = int($3);
101 $o .= $processed;
102 }
103 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
104 {
105 $normal_base = hex($2);
106 $o .= $processed;
107 }
108 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
109 {
110 $normal_base = int($2);
111 $o .= $processed;
112 }
113
114 push( @lines, $o );
115}
116
117#
[242]118# On the second pass, loop through lines of the listing, looking for 'db' lines
[197]119# (and dealing with continuations) and compressing each line as it is encountered.
120#
121for( $l = 0; $l < $#lines; $l++ )
122{
123 $_ = $lines[$l];
124
125 #
[189]126 # The <number> indicates a line from an include file, do not include in the output
127 #
128 if( /^\s*\d+\s*\<\d\>/ )
129 {
130 }
131
132 #
133 # a 'db' line, with or without a label
134 #
135 elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
136 {
137 $bytes = $1;
138 $continuation = $2;
139 $label = $3;
140 $spacing = $4;
141 $db = $5;
142 $string = $6;
143
144 print $label.$spacing."; ".$db.$string."\n";
145
146 if( $continuation eq "-" )
147 {
148 do
149 {
[197]150 $_ = $lines[++$l];
151 /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
[189]152 $bytes .= $1;
153 $continuation = $2;
154 }
155 while( $continuation eq "-" );
156 }
157
158 &processString( $bytes, $label.$spacing, $db );
159 }
[242]160
161 #
[277]162 # a ';%%;' prefix line, copy to output without the prefix
163 #
164 elsif( /^\s*\d+\s*;%%;\s*(.*)$/ )
165 {
166 print $1."\n";
167 }
168
169 #
[189]170 # everything else, copy to the output as is
171 #
172 elsif( /^\s*\d+\s*(.*)$/ )
173 {
174 print $1."\n";
175 }
176}
177
[197]178print ";;; end of input stream\n\n";
[189]179
180#--------------------------------------------------------------------------------
181#
182# Output constants and the TranslatesAndFormats table
183#
184
[334]185print "%endif ; STRINGSCOMPRESSED_STRINGS\n\n";
186print "%ifdef STRINGSCOMPRESSED_TABLES\n\n";
187
[189]188print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
189
190print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
191
192print "StringsCompressed_TranslatesAndFormats: \n";
193
194foreach $f (keys(%translate))
195{
196 $translate_index[$translate{$f}] = $f;
[196]197 $used{$f} || die "translate $f unused\n";
[189]198 $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
199}
200
201for( $g = 0; $translate_index[$g]; $g++ )
202{
203 print " db ".$translate_index[$g]." ; ".$g."\n";
204}
205
206foreach $f (keys(%format))
207{
208 $n = $f;
209 $n =~ s/\-/_/g;
210 $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
211 $used{$f} || die "format $f unused\n";
212 $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
213}
214
215for( $t = $format_begin; $format_index[$t]; $t++ )
216{
217 print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
218}
219
220print "\n";
221
[242]222#
[189]223# Ensure that branch targets are within reach
224#
[194]225print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
[189]226for( $t = $format_begin; $format_index[$t]; $t++ )
227{
228 print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
229 print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
230 print "%endif\n";
231}
[194]232print "%endif\n";
[189]233
234#--------------------------------------------------------------------------------
235#
236# Output usage statistics
237#
238
239print "\n;; translated usage stats\n";
[241]240foreach $f (keys(%translate))
[189]241{
242 print ";; ".$f.":".$used{$f}."\n";
243 $translate_count++;
244}
245print ";; total translated: ".$translate_count."\n";
246
247print "\n;; format usage stats\n";
248$format_count = 0;
249foreach $f (keys(%format))
250{
251 print ";; ".$f.":".$used{$f}."\n";
252 $format_count++;
253}
254print ";; total format: ".$format_count."\n";
255
256print "\n;; alphabet usage stats\n";
257
258$used_count = 0;
259for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
260{
261 print ";; ".$t.",".chr($t).":".$used{$t}."\n";
262 if( $used{$t} )
263 {
264 $used_count++;
265 }
266}
267print ";; alphabet used count: ".$used_count."\n";
268
[334]269print "%endif ; STRINGSCOMPRESSED_TABLES\n\n";
270
[189]271#--------------------------------------------------------------------------------
272#
273# processString does the real compression work...
274#
275
276sub processString
277{
278 $chars = $_[0];
279 $label = $_[1];
280 $db = $_[2];
281
282 $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
283
284 #
285 # Copy numeric bytes out of hexadecimal pairs in the listing
286 #
287 $#v = 0;
288
289 $orig = "";
290 for( $g = 0; $g < length($chars); $g += 2 )
291 {
292 $i = $g/2;
293 $v[$i] = hex(substr($chars,$g,2));
294 $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
295 }
[242]296 $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
[189]297 # the string is a null
298
299 $output = "";
300 #
301 # Loop through bytes...
302 # looking ahead as needed for possible space and null optimizations, compiling formats
303 #
304 for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
305 {
306 #
307 # Special translation of LF,CR to a format
308 #
309 if( $v[$g] == 10 && $v[$g+1] == 13 )
310 {
311 $g++;
312 $post = $code_translate;
313 $code = $format{"nl"};
314 $used{"nl"}++;
315 }
316
317 #
318 # Format operators
319 #
320 elsif( $v[$g] == 0x25 ) # "%"
321 {
322 $fo = "";
323 $g++;
324 if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
325 {
326 $fo = $fo.chr($v[$g]);
327 $g++;
328 }
329 if( $v[$g] == ord("-") )
330 {
331 $fo = $fo.chr($v[$g]);
332 $g++;
333 }
334 $fo = $fo.chr($v[$g]);
335
336 $format{$fo} || die "unknown format operator: '".$fo."'\n";
337
338 $code = $format{$fo};
339 $post = $code_translate;
340 $used{$fo}++;
341 }
342
343 #
344 # Translated characters
345 #
346 elsif( $v[$g] == 32 || $translate{$v[$g]} )
347 {
348 $post = $code_translate;
349 $code = $translate{$v[$g]};
350 $used{$v[$g]}++;
351 }
[242]352
[189]353 #
354 # "normal" characters (alphabet, and ASCII characters around the alphabet)
355 #
356 elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
357 {
358 $used{$v[$g]}++;
359
360 $post = $code_normal;
361 $code = $v[$g] - $normal_base;
362 }
363
364 #
365 # Not found
[242]366 #
[189]367 else
368 {
369 die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
370 }
371
372 if( $post == $code_translate )
373 {
374 #
375 # NULL optimization (space optimization not possible on translate/format)
376 #
377 if( $v[$g+1] == 0 )
378 {
379 $g++;
380 $post = $post | $code_translate_null;
381 }
382 else
383 {
384 $post = $post | $code_translate_normal;
385 }
386 }
387 else # $post == $code_normal
388 {
389 #
390 # Space optimization
391 #
392 if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
393 {
[242]394 # can't take this optimization if the next byte is a null,
[189]395 # since we can't have both a postfix space and null
396 $g++;
397 $post = $code_space;
398 }
399
400 #
401 # NULL optimization
402 #
403 elsif( $v[$g+1] == 0 )
404 {
405 $g++;
406 $post = $code_null;
407 }
408 }
409
410 $code = $code | $post;
411 $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
412 }
413
414 print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
415 print $label." ".$db.substr($output,2);
416 for( $t = length($output); $t < length($orig); $t++ )
417 {
418 print " ";
419 }
420 print " ; compressed\n\n";
421}
422
Note: See TracBrowser for help on using the repository browser.