source: xtideuniversalbios/trunk/Tools/StringsCompress.pl@ 467

Last change on this file since 467 was 376, checked in by gregli@…, 13 years ago

WIDE checkin... Added copyright and license information to sorce files, as per the GPL instructions for usage.

File size: 11.2 KB
Line 
1#======================================================================================
2#
3# Project name : XTIDE Universal BIOS
4#
5# Authors : Greg Lindhorst
6# gregli@hotmail.com
7#
8# Description : Script for compiling and compressing strings for
9# use by DisplayFormatCompressed.asm. See the header of that file
10# for a description of the compression scheme.
11#
12# XTIDE Universal BIOS and Associated Tools
13# Copyright (C) 2009-2010 by Tomi Tilli, 2011-2012 by XTIDE Universal BIOS Team.
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24# Visit http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
25#
26
27#
28# Usage : stdin: Listing of strings.asm,
29# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
30# We used the listing so that the assembler can take care of
31# resolving %define and EQU symbol definitions.
32#
33# stdout: StringsCompressed.asm,
34# plug replacement for Strings.asm (included by Main.asm)
35#
36# Also see the XTIDE makefile for building StringsCompressed.asm
37#
38
39#----------------------------------------------------------------------
40#
41# Translated, Format, and "Normal" characters
42#
43# DisplayFormatCompressed can only deal with characters in one of the following categories:
44# 1. Those in the Translate associative array
45# 2. Those in the Format associative array
46# 3. Characters between $normal_base and $normal_base+0x40
47# (typically covers upper and lower case alphabets)
48# 4. Null characters (marking the end of strings)
49# 5. The special string LF,CR
50#
51# If a character or format read at the input cannot be found in one of the above categories,
52# it must be added here before this script will accept it (and DisplayFormatCompressed can
53# display it).
54#
55# Tables for the above categories are expected in the input stream, before string to be
56# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
57# and do not need to be updated there. Needed information is put in the compression output
58# that it reads.
59#
60
61#
62# High order code bits, determining which type of character we have (translated or not) and
63# if a space or null should come after this character.
64#
65$code_space = 0xc0;
66$code_null = 0x80;
67$code_normal = 0x40;
68$code_translate = 0x00;
69
70#
71# Bit used if it is a translated byte
72#
73$code_translate_null = 0x00;
74$code_translate_normal = 0x20;
75
76print ";;;======================================================================\n";
77print ";;;\n";
78print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
79print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
80print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
81print ";;;\n";
82print ";;;======================================================================\n\n";
83
84print "%ifdef STRINGSCOMPRESSED_STRINGS\n\n";
85
86#
87# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
88# are expected in the input stream.
89#
90$processed = " [StringsCompress Processed]";
91while(<>)
92{
93 chop;
94 $o = $_;
95
96 #
97 # Table entries for this script
98 #
99 if( /^\s*\d+\s*(\;\$translate\{\s*ord\(\s*'(.)'\s*\)\s*\}\s*=\s*([0-9]+).*$)/ )
100 {
101 $translate{ord($2)} = int($3);
102 $o .= $processed;
103 }
104 elsif( /^\s*\d+\s*(\;\$translate\{\s*([0-9]+)\s*\}\s*=\s*([0-9]+).*$)/ )
105 {
106 $translate{int($2)} = int($3);
107 $o .= $processed;
108 }
109 elsif( /^\s*\d+\s*(\;\$format_begin\s*=\s*([0-9]+).*$)/ )
110 {
111 $format_begin = int($2);
112 $o .= $processed;
113 }
114 elsif( /^\s*\d+\s*(\;\$format\{\s*\"([^\"]+)\"\s*\}\s*=\s*([0-9]+).*$)/ )
115 {
116 $format{$2} = int($3);
117 $o .= $processed;
118 }
119 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*0x([0-9a-fA-F]+).*$)/ )
120 {
121 $normal_base = hex($2);
122 $o .= $processed;
123 }
124 elsif( /^\s*\d+\s*(\;\$normal_base\s*=\s*([0-9]+).*$)/ )
125 {
126 $normal_base = int($2);
127 $o .= $processed;
128 }
129
130 push( @lines, $o );
131}
132
133#
134# On the second pass, loop through lines of the listing, looking for 'db' lines
135# (and dealing with continuations) and compressing each line as it is encountered.
136#
137for( $l = 0; $l < $#lines; $l++ )
138{
139 $_ = $lines[$l];
140
141 #
142 # The <number> indicates a line from an include file, do not include in the output
143 #
144 if( /^\s*\d+\s*\<\d\>/ )
145 {
146 }
147
148 #
149 # a 'db' line, with or without a label
150 #
151 elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
152 {
153 $bytes = $1;
154 $continuation = $2;
155 $label = $3;
156 $spacing = $4;
157 $db = $5;
158 $string = $6;
159
160 print $label.$spacing."; ".$db.$string."\n";
161
162 if( $continuation eq "-" )
163 {
164 do
165 {
166 $_ = $lines[++$l];
167 /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation: '".$_."'";
168 $bytes .= $1;
169 $continuation = $2;
170 }
171 while( $continuation eq "-" );
172 }
173
174 &processString( $bytes, $label.$spacing, $db );
175 }
176
177 #
178 # a ';%%;' prefix line, copy to output without the prefix
179 #
180 elsif( /^\s*\d+\s*;%%;\s*(.*)$/ )
181 {
182 print $1."\n";
183 }
184
185 #
186 # everything else, copy to the output as is
187 #
188 elsif( /^\s*\d+\s*(.*)$/ )
189 {
190 print $1."\n";
191 }
192}
193
194print ";;; end of input stream\n\n";
195
196#--------------------------------------------------------------------------------
197#
198# Output constants and the TranslatesAndFormats table
199#
200
201print "%endif ; STRINGSCOMPRESSED_STRINGS\n\n";
202print "%ifdef STRINGSCOMPRESSED_TABLES\n\n";
203
204print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
205
206print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
207
208print "StringsCompressed_TranslatesAndFormats: \n";
209
210foreach $f (keys(%translate))
211{
212 $translate_index[$translate{$f}] = $f;
213 $used{$f} || die "translate $f unused\n";
214 $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
215}
216
217for( $g = 0; $translate_index[$g]; $g++ )
218{
219 print " db ".$translate_index[$g]." ; ".$g."\n";
220}
221
222foreach $f (keys(%format))
223{
224 $n = $f;
225 $n =~ s/\-/_/g;
226 $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
227 $used{$f} || die "format $f unused\n";
228 $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
229}
230
231for( $t = $format_begin; $format_index[$t]; $t++ )
232{
233 print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
234}
235
236print "\n";
237
238#
239# Ensure that branch targets are within reach
240#
241print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
242for( $t = $format_begin; $format_index[$t]; $t++ )
243{
244 print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
245 print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
246 print "%endif\n";
247}
248print "%endif\n";
249
250#--------------------------------------------------------------------------------
251#
252# Output usage statistics
253#
254
255print "\n;; translated usage stats\n";
256foreach $f (keys(%translate))
257{
258 print ";; ".$f.":".$used{$f}."\n";
259 $translate_count++;
260}
261print ";; total translated: ".$translate_count."\n";
262
263print "\n;; format usage stats\n";
264$format_count = 0;
265foreach $f (keys(%format))
266{
267 print ";; ".$f.":".$used{$f}."\n";
268 $format_count++;
269}
270print ";; total format: ".$format_count."\n";
271
272print "\n;; alphabet usage stats\n";
273
274$used_count = 0;
275for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
276{
277 print ";; ".$t.",".chr($t).":".$used{$t}."\n";
278 if( $used{$t} )
279 {
280 $used_count++;
281 }
282}
283print ";; alphabet used count: ".$used_count."\n";
284
285print "%endif ; STRINGSCOMPRESSED_TABLES\n\n";
286
287#--------------------------------------------------------------------------------
288#
289# processString does the real compression work...
290#
291
292sub processString
293{
294 $chars = $_[0];
295 $label = $_[1];
296 $db = $_[2];
297
298 $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
299
300 #
301 # Copy numeric bytes out of hexadecimal pairs in the listing
302 #
303 $#v = 0;
304
305 $orig = "";
306 for( $g = 0; $g < length($chars); $g += 2 )
307 {
308 $i = $g/2;
309 $v[$i] = hex(substr($chars,$g,2));
310 $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
311 }
312 $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
313 # the string is a null
314
315 $output = "";
316 #
317 # Loop through bytes...
318 # looking ahead as needed for possible space and null optimizations, compiling formats
319 #
320 for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
321 {
322 #
323 # Special translation of LF,CR to a format
324 #
325 if( $v[$g] == 10 && $v[$g+1] == 13 )
326 {
327 $g++;
328 $post = $code_translate;
329 $code = $format{"nl"};
330 $used{"nl"}++;
331 }
332
333 #
334 # Format operators
335 #
336 elsif( $v[$g] == 0x25 ) # "%"
337 {
338 $fo = "";
339 $g++;
340 if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
341 {
342 $fo = $fo.chr($v[$g]);
343 $g++;
344 }
345 if( $v[$g] == ord("-") )
346 {
347 $fo = $fo.chr($v[$g]);
348 $g++;
349 }
350 $fo = $fo.chr($v[$g]);
351
352 $format{$fo} || die "unknown format operator: '".$fo."'\n";
353
354 $code = $format{$fo};
355 $post = $code_translate;
356 $used{$fo}++;
357 }
358
359 #
360 # Translated characters
361 #
362 elsif( $v[$g] == 32 || $translate{$v[$g]} )
363 {
364 $post = $code_translate;
365 $code = $translate{$v[$g]};
366 $used{$v[$g]}++;
367 }
368
369 #
370 # "normal" characters (alphabet, and ASCII characters around the alphabet)
371 #
372 elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
373 {
374 $used{$v[$g]}++;
375
376 $post = $code_normal;
377 $code = $v[$g] - $normal_base;
378 }
379
380 #
381 # Not found
382 #
383 else
384 {
385 die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
386 }
387
388 if( $post == $code_translate )
389 {
390 #
391 # NULL optimization (space optimization not possible on translate/format)
392 #
393 if( $v[$g+1] == 0 )
394 {
395 $g++;
396 $post = $post | $code_translate_null;
397 }
398 else
399 {
400 $post = $post | $code_translate_normal;
401 }
402 }
403 else # $post == $code_normal
404 {
405 #
406 # Space optimization
407 #
408 if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
409 {
410 # can't take this optimization if the next byte is a null,
411 # since we can't have both a postfix space and null
412 $g++;
413 $post = $code_space;
414 }
415
416 #
417 # NULL optimization
418 #
419 elsif( $v[$g+1] == 0 )
420 {
421 $g++;
422 $post = $code_null;
423 }
424 }
425
426 $code = $code | $post;
427 $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
428 }
429
430 print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
431 print $label." ".$db.substr($output,2);
432 for( $t = length($output); $t < length($orig); $t++ )
433 {
434 print " ";
435 }
436 print " ; compressed\n\n";
437}
438
Note: See TracBrowser for help on using the repository browser.