source: xtideuniversalbios/trunk/Tools/StringsCompress.pl@ 256

Last change on this file since 256 was 242, checked in by krille_n_@…, 13 years ago

Changes:

  • Optimizations (both for size and speed) in IdeTransfer.asm and MemIdeTransfer.asm
  • Fixed a bug where the SingleByteRead/Write functions in IdeTransfer.asm would fail on 128 sector transfers.
  • Fixed some typos and errors in general, comments etc.
File size: 10.2 KB
RevLine 
[189]1#======================================================================================
2#
3# Project name : XTIDE Universal BIOS
4#
5# Authors : Greg Lindhorst
6# gregli@hotmail.com
7#
[242]8# Description : Script for compiling and compressing strings for
[189]9# use by DisplayFormatCompressed.asm. See the header of that file
10# for a description of the compression scheme.
11#
[242]12# Usage : stdin: Listing of strings.asm,
[189]13# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14# We used the listing so that the assembler can take care of
15# resolving %define and EQU symbol definitions.
16#
17# stdout: StringsCompressed.asm,
18# plug replacement for Strings.asm (included by Main.asm)
19#
20# Also see the XTIDE makefile for building StringsCompressed.asm
21#
22
23#----------------------------------------------------------------------
24#
[197]25# Translated, Format, and "Normal" characters
[189]26#
27# DisplayFormatCompressed can only deal with characters in one of the following categories:
28# 1. Those in the Translate associative array
29# 2. Those in the Format associative array
[242]30# 3. Characters between $normal_base and $normal_base+0x40
31# (typically covers upper and lower case alphabets)
[189]32# 4. Null characters (marking the end of strings)
33# 5. The special string LF,CR
34#
[242]35# If a character or format read at the input cannot be found in one of the above categories,
36# it must be added here before this script will accept it (and DisplayFormatCompressed can
[189]37# display it).
38#
[197]39# Tables for the above categories are expected in the input stream, before string to be
[242]40# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
41# and do not need to be updated there. Needed information is put in the compression output
[197]42# that it reads.
[189]43#
44
45#
46# High order code bits, determining which type of character we have (translated or not) and
47# if a space or null should come after this character.
48#
49$code_space = 0xc0;
50$code_null = 0x80;
51$code_normal = 0x40;
52$code_translate = 0x00;
53
54#
55# Bit used if it is a translated byte
56#
57$code_translate_null = 0x00;
58$code_translate_normal = 0x20;
59
60print ";;;======================================================================\n";
61print ";;;\n";
62print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
[242]63print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
[189]64print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
65print ";;;\n";
66print ";;;======================================================================\n\n";
67
[197]68
[189]69#
[242]70# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
71# are expected in the input stream.
[189]72#
[197]73$processed = " [StringsCompress Processed]";
[189]74while(<>)
75{
[197]76 chop;
77 $o = $_;
78
[189]79 #
[197]80 # Table entries for this script
81 #
82 if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
83 {
84 $translate{ord($2)} = int($3);
85 $o .= $processed;
86 }
87 elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
88 {
89 $translate{int($2)} = int($3);
90 $o .= $processed;
91 }
92 elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
93 {
94 $format_begin = int($2);
95 $o .= $processed;
96 }
97 elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
98 {
99 $format{$2} = int($3);
100 $o .= $processed;
101 }
102 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
103 {
104 $normal_base = hex($2);
105 $o .= $processed;
106 }
107 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
108 {
109 $normal_base = int($2);
110 $o .= $processed;
111 }
112
113 push( @lines, $o );
114}
115
116#
[242]117# On the second pass, loop through lines of the listing, looking for 'db' lines
[197]118# (and dealing with continuations) and compressing each line as it is encountered.
119#
120for( $l = 0; $l < $#lines; $l++ )
121{
122 $_ = $lines[$l];
123
124 #
[189]125 # The <number> indicates a line from an include file, do not include in the output
126 #
127 if( /^\s*\d+\s*\<\d\>/ )
128 {
129 }
130
131 #
132 # a 'db' line, with or without a label
133 #
134 elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
135 {
136 $bytes = $1;
137 $continuation = $2;
138 $label = $3;
139 $spacing = $4;
140 $db = $5;
141 $string = $6;
142
143 print $label.$spacing."; ".$db.$string."\n";
144
145 if( $continuation eq "-" )
146 {
147 do
148 {
[197]149 $_ = $lines[++$l];
150 /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
[189]151 $bytes .= $1;
152 $continuation = $2;
153 }
154 while( $continuation eq "-" );
155 }
156
157 &processString( $bytes, $label.$spacing, $db );
158 }
[242]159
160 #
[189]161 # everything else, copy to the output as is
162 #
163 elsif( /^\s*\d+\s*(.*)$/ )
164 {
165 print $1."\n";
166 }
167}
168
[197]169print ";;; end of input stream\n\n";
[189]170
171#--------------------------------------------------------------------------------
172#
173# Output constants and the TranslatesAndFormats table
174#
175
176print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
177
178print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
179
180print "StringsCompressed_TranslatesAndFormats: \n";
181
182foreach $f (keys(%translate))
183{
184 $translate_index[$translate{$f}] = $f;
[196]185 $used{$f} || die "translate $f unused\n";
[189]186 $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
187}
188
189for( $g = 0; $translate_index[$g]; $g++ )
190{
191 print " db ".$translate_index[$g]." ; ".$g."\n";
192}
193
194foreach $f (keys(%format))
195{
196 $n = $f;
197 $n =~ s/\-/_/g;
198 $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
199 $used{$f} || die "format $f unused\n";
200 $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
201}
202
203for( $t = $format_begin; $format_index[$t]; $t++ )
204{
205 print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
206}
207
208print "\n";
209
[242]210#
[189]211# Ensure that branch targets are within reach
212#
[194]213print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
[189]214for( $t = $format_begin; $format_index[$t]; $t++ )
215{
216 print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
217 print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
218 print "%endif\n";
219}
[194]220print "%endif\n";
[189]221
222#--------------------------------------------------------------------------------
223#
224# Output usage statistics
225#
226
227print "\n;; translated usage stats\n";
[241]228foreach $f (keys(%translate))
[189]229{
230 print ";; ".$f.":".$used{$f}."\n";
231 $translate_count++;
232}
233print ";; total translated: ".$translate_count."\n";
234
235print "\n;; format usage stats\n";
236$format_count = 0;
237foreach $f (keys(%format))
238{
239 print ";; ".$f.":".$used{$f}."\n";
240 $format_count++;
241}
242print ";; total format: ".$format_count."\n";
243
244print "\n;; alphabet usage stats\n";
245
246$used_count = 0;
247for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
248{
249 print ";; ".$t.",".chr($t).":".$used{$t}."\n";
250 if( $used{$t} )
251 {
252 $used_count++;
253 }
254}
255print ";; alphabet used count: ".$used_count."\n";
256
257#--------------------------------------------------------------------------------
258#
259# processString does the real compression work...
260#
261
262sub processString
263{
264 $chars = $_[0];
265 $label = $_[1];
266 $db = $_[2];
267
268 $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
269
270 #
271 # Copy numeric bytes out of hexadecimal pairs in the listing
272 #
273 $#v = 0;
274
275 $orig = "";
276 for( $g = 0; $g < length($chars); $g += 2 )
277 {
278 $i = $g/2;
279 $v[$i] = hex(substr($chars,$g,2));
280 $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
281 }
[242]282 $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
[189]283 # the string is a null
284
285 $output = "";
286 #
287 # Loop through bytes...
288 # looking ahead as needed for possible space and null optimizations, compiling formats
289 #
290 for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
291 {
292 #
293 # Special translation of LF,CR to a format
294 #
295 if( $v[$g] == 10 && $v[$g+1] == 13 )
296 {
297 $g++;
298 $post = $code_translate;
299 $code = $format{"nl"};
300 $used{"nl"}++;
301 }
302
303 #
304 # Format operators
305 #
306 elsif( $v[$g] == 0x25 ) # "%"
307 {
308 $fo = "";
309 $g++;
310 if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
311 {
312 $fo = $fo.chr($v[$g]);
313 $g++;
314 }
315 if( $v[$g] == ord("-") )
316 {
317 $fo = $fo.chr($v[$g]);
318 $g++;
319 }
320 $fo = $fo.chr($v[$g]);
321
322 $format{$fo} || die "unknown format operator: '".$fo."'\n";
323
324 $code = $format{$fo};
325 $post = $code_translate;
326 $used{$fo}++;
327 }
328
329 #
330 # Translated characters
331 #
332 elsif( $v[$g] == 32 || $translate{$v[$g]} )
333 {
334 $post = $code_translate;
335 $code = $translate{$v[$g]};
336 $used{$v[$g]}++;
337 }
[242]338
[189]339 #
340 # "normal" characters (alphabet, and ASCII characters around the alphabet)
341 #
342 elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
343 {
344 $used{$v[$g]}++;
345
346 $post = $code_normal;
347 $code = $v[$g] - $normal_base;
348 }
349
350 #
351 # Not found
[242]352 #
[189]353 else
354 {
355 die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
356 }
357
358 if( $post == $code_translate )
359 {
360 #
361 # NULL optimization (space optimization not possible on translate/format)
362 #
363 if( $v[$g+1] == 0 )
364 {
365 $g++;
366 $post = $post | $code_translate_null;
367 }
368 else
369 {
370 $post = $post | $code_translate_normal;
371 }
372 }
373 else # $post == $code_normal
374 {
375 #
376 # Space optimization
377 #
378 if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
379 {
[242]380 # can't take this optimization if the next byte is a null,
[189]381 # since we can't have both a postfix space and null
382 $g++;
383 $post = $code_space;
384 }
385
386 #
387 # NULL optimization
388 #
389 elsif( $v[$g+1] == 0 )
390 {
391 $g++;
392 $post = $code_null;
393 }
394 }
395
396 $code = $code | $post;
397 $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
398 }
399
400 print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
401 print $label." ".$db.substr($output,2);
402 for( $t = length($output); $t < length($orig); $t++ )
403 {
404 print " ";
405 }
406 print " ; compressed\n\n";
407}
408
Note: See TracBrowser for help on using the repository browser.