source: xtideuniversalbios/trunk/Tools/StringsCompress.pl@ 508

Last change on this file since 508 was 492, checked in by gregli@…, 12 years ago

Removed the dependency between MODULE_BOOT_MENU and MODULE_HOTKEYS. With these changes, 0, 1, or 2 of them can be included in a build. This change also means that the hotkeys don't work while the menu is up. But the most important hotkey there was for Rom Boot, and that has been added to the menu as a choice proper. Lots of changes across the board in the hotkeys code - even if we eventually back this change out (becaue, for example we want hotkeys to work in the menu) we should probably start from this base and add that functionality back in, as these changes results in approximately 120 bytes of savings and includes new functionality, such as the Rom Boot menu item and the Com Detect hotkey.

File size: 11.2 KB
Line 
1#======================================================================================
2#
3# Project name : XTIDE Universal BIOS
4#
5# Authors : Greg Lindhorst
6# gregli@hotmail.com
7#
8# Description : Script for compiling and compressing strings for
9# use by DisplayFormatCompressed.asm. See the header of that file
10# for a description of the compression scheme.
11#
12# XTIDE Universal BIOS and Associated Tools
13# Copyright (C) 2009-2010 by Tomi Tilli, 2011-2012 by XTIDE Universal BIOS Team.
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24# Visit http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
25#
26
27#
28# Usage : stdin: Listing of strings.asm,
29# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
30# We used the listing so that the assembler can take care of
31# resolving %define and EQU symbol definitions.
32#
33# stdout: StringsCompressed.asm,
34# plug replacement for Strings.asm (included by Main.asm)
35#
36# Also see the XTIDE makefile for building StringsCompressed.asm
37#
38
39#----------------------------------------------------------------------
40#
41# Translated, Format, and "Normal" characters
42#
43# DisplayFormatCompressed can only deal with characters in one of the following categories:
44# 1. Those in the Translate associative array
45# 2. Those in the Format associative array
46# 3. Characters between $normal_base and $normal_base+0x40
47# (typically covers upper and lower case alphabets)
48# 4. Null characters (marking the end of strings)
49# 5. The special string LF,CR
50#
51# If a character or format read at the input cannot be found in one of the above categories,
52# it must be added here before this script will accept it (and DisplayFormatCompressed can
53# display it).
54#
55# Tables for the above categories are expected in the input stream, before string to be
56# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
57# and do not need to be updated there. Needed information is put in the compression output
58# that it reads.
59#
60
61#
62# High order code bits, determining which type of character we have (translated or not) and
63# if a space or null should come after this character.
64#
65$code_space = 0xc0;
66$code_null = 0x80;
67$code_normal = 0x40;
68$code_translate = 0x00;
69
70#
71# Bit used if it is a translated byte
72#
73$code_translate_null = 0x00;
74$code_translate_normal = 0x20;
75
76print ";;;======================================================================\n";
77print ";;;\n";
78print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
79print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
80print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
81print ";;;\n";
82print ";;;======================================================================\n\n";
83
84print "%ifdef STRINGSCOMPRESSED_STRINGS\n\n";
85
86#
87# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
88# are expected in the input stream.
89#
90$processed = " [StringsCompress Processed]";
91while(<>)
92{
93 chop;
94 $o = $_;
95
96 #
97 # Table entries for this script
98 #
99 if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
100 {
101 $translate{ord($2)} = int($3);
102 $o .= $processed;
103 }
104 elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
105 {
106 $translate{int($2)} = int($3);
107 $o .= $processed;
108 }
109 elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
110 {
111 $format_begin = int($2);
112 $o .= $processed;
113 }
114 elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
115 {
116 $format{$2} = int($3);
117 $o .= $processed;
118 }
119 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
120 {
121 $normal_base = hex($2);
122 $o .= $processed;
123 }
124 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
125 {
126 $normal_base = int($2);
127 $o .= $processed;
128 }
129
130 push( @lines, $o );
131}
132
133#
134# On the second pass, loop through lines of the listing, looking for 'db' lines
135# (and dealing with continuations) and compressing each line as it is encountered.
136#
137for( $l = 0; $l < $#lines; $l++ )
138{
139 $_ = $lines[$l];
140
141 #
142 # The <number> indicates a line from an include file, do not include in the output
143 #
144 if( /^\s*\d+\s*\<\d\>/ )
145 {
146 }
147
148 #
149 # a 'db' line, with or without a label
150 #
151 elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
152 {
153 $bytes = $1;
154 $continuation = $2;
155 $label = $3;
156 $spacing = $4;
157 $db = $5;
158 $string = $6;
159
160 print $label.$spacing."; ".$db.$string."\n";
161
162 if( $continuation eq "-" )
163 {
164 do
165 {
166 $_ = $lines[++$l];
167 /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
168 $bytes .= $1;
169 $continuation = $2;
170 }
171 while( $continuation eq "-" );
172 }
173
174 &processString( $bytes, $label.$spacing, $db );
175 }
176
177 #
178 # a ';%%;' prefix line, copy to output without the prefix
179 #
180 elsif( /^\s*\d+\s*;%%;\s*(.*)$/ )
181 {
182 print $1."\n";
183 }
184
185 #
186 # everything else, copy to the output as is
187 #
188 elsif( /^\s*\d+\s*(.*)$/ )
189 {
190 print $1."\n";
191 }
192}
193
194print ";;; end of input stream\n\n";
195
196#--------------------------------------------------------------------------------
197#
198# Output constants and the TranslatesAndFormats table
199#
200
201print "%endif ; STRINGSCOMPRESSED_STRINGS\n\n";
202print "%ifdef STRINGSCOMPRESSED_TABLES\n\n";
203
204print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
205
206print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
207
208print "StringsCompressed_TranslatesAndFormats: \n";
209
210foreach $f (keys(%translate))
211{
212 $translate_index[$translate{$f}] = $f;
213 $used{$f} || die "translate $f unused\n";
214 $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
215}
216
217for( $g = 0; $translate_index[$g]; $g++ )
218{
219 print " db ".$translate_index[$g]." ; ".$g."\n";
220}
221
222foreach $f (keys(%format))
223{
224 $n = $f;
225 $n =~ s/\-/_/g;
226 $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
227 $used{$f} || die "format $f unused\n";
228 $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
229}
230
231for( $t = $format_begin; $format_index[$t]; $t++ )
232{
233 print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
234}
235
236print "\n";
237
238#
239# Ensure that branch targets are within reach
240#
241print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
242for( $t = $format_begin; $format_index[$t]; $t++ )
243{
244 print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
245 print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
246 print "%endif\n";
247}
248print "%endif\n";
249
250#--------------------------------------------------------------------------------
251#
252# Output usage statistics
253#
254
255print "\n;; translated usage stats\n";
256foreach $f (keys(%translate))
257{
258 print ";; ".$f.":".$used{$f}."\n";
259 $translate_count++;
260}
261print ";; total translated: ".$translate_count."\n";
262
263print "\n;; format usage stats\n";
264$format_count = 0;
265foreach $f (keys(%format))
266{
267 print ";; ".$f.":".$used{$f}."\n";
268 $format_count++;
269}
270print ";; total format: ".$format_count."\n";
271
272print "\n;; alphabet usage stats\n";
273
274$used_count = 0;
275for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
276{
277 print ";; ".$t.",".chr($t).":".$used{$t}."\n";
278 if( $used{$t} )
279 {
280 $used_count++;
281 }
282}
283print ";; alphabet used count: ".$used_count."\n";
284
285print "%endif ; STRINGSCOMPRESSED_TABLES\n\n";
286
287#--------------------------------------------------------------------------------
288#
289# processString does the real compression work...
290#
291
292sub processString
293{
294 $chars = $_[0];
295 $label = $_[1];
296 $db = $_[2];
297
298 $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
299
300 #
301 # Copy numeric bytes out of hexadecimal pairs in the listing
302 #
303 $#v = 0;
304
305 $orig = "";
306 for( $g = 0; $g < length($chars); $g += 2 )
307 {
308 $i = $g/2;
309 $v[$i] = hex(substr($chars,$g,2));
310 $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
311 }
312 $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
313 # the string is a null
314
315 $output = "";
316 #
317 # Loop through bytes...
318 # looking ahead as needed for possible space and null optimizations, compiling formats
319 #
320 for( $g = 0; $g < $#v; $g++ )
321 {
322 #
323 # Special translation of LF,CR to a format
324 #
325 if( $v[$g] == 10 && $v[$g+1] == 13 )
326 {
327 $g++;
328 $post = $code_translate;
329 $code = $format{"nl"};
330 $used{"nl"}++;
331 }
332
333 #
334 # Format operators
335 #
336 elsif( $v[$g] == 0x25 ) # "%"
337 {
338 $fo = "";
339 $g++;
340 if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
341 {
342 $fo = $fo.chr($v[$g]);
343 $g++;
344 }
345 if( $v[$g] == ord("-") )
346 {
347 $fo = $fo.chr($v[$g]);
348 $g++;
349 }
350 $fo = $fo.chr($v[$g]);
351
352 $format{$fo} || die "unknown format operator: '".$fo."'\n";
353
354 $code = $format{$fo};
355 $post = $code_translate;
356 $used{$fo}++;
357 }
358
359 #
360 # Translated characters
361 #
362 elsif( $v[$g] == 32 || $translate{$v[$g]} )
363 {
364 $post = $code_translate;
365 $code = $translate{$v[$g]};
366 $used{$v[$g]}++;
367 }
368
369 #
370 # "normal" characters (alphabet, and ASCII characters around the alphabet)
371 #
372 elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
373 {
374 $used{$v[$g]}++;
375
376 $post = $code_normal;
377 $code = $v[$g] - $normal_base;
378 }
379
380 #
381 # Not found
382 #
383 else
384 {
385 die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
386 }
387
388 if( $post == $code_translate )
389 {
390 #
391 # NULL optimization (space optimization not possible on translate/format)
392 #
393 if( $v[$g+1] == 0 )
394 {
395 $g++;
396 $post = $post | $code_translate_null;
397 }
398 else
399 {
400 $post = $post | $code_translate_normal;
401 }
402 }
403 else # $post == $code_normal
404 {
405 #
406 # Space optimization
407 #
408 if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
409 {
410 # can't take this optimization if the next byte is a null,
411 # since we can't have both a postfix space and null
412 $g++;
413 $post = $code_space;
414 }
415
416 #
417 # NULL optimization
418 #
419 elsif( $v[$g+1] == 0 )
420 {
421 $g++;
422 $post = $code_null;
423 }
424 }
425
426 $code = $code | $post;
427 $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
428 }
429
430 print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
431 print $label." ".$db.substr($output,2);
432 for( $t = length($output); $t < length($orig); $t++ )
433 {
434 print " ";
435 }
436 print " ; compressed\n\n";
437}
438
Note: See TracBrowser for help on using the repository browser.