1 | #======================================================================================
2 | #
3 | # Project name : XTIDE Universal BIOS
4 | #
5 | # Authors : Greg Lindhorst
6 | # gregli@hotmail.com
7 | #
8 | # Description : Script for compiling and compressing strings for
9 | # use by DisplayFormatCompressed.asm. See the header of that file
10 | # for a description of the compression scheme.
11 | #
12 | # Usage : stdin: Listing of strings.asm,
14 | # We used the listing so that the assembler can take care of
15 | # resolving %define and EQU symbol definitions.
16 | #
17 | # stdout: StringsCompressed.asm,
18 | # plug replacement for Strings.asm (included by Main.asm)
19 | #
20 | # Also see the XTIDE makefile for building StringsCompressed.asm
21 | #
22 |
23 | #----------------------------------------------------------------------
24 | #
25 | # Translated and Format characters
26 | #
27 | # DisplayFormatCompressed can only deal with characters in one of the following categories:
28 | # 1. Those in the Translate associative array
29 | # 2. Those in the Format associative array
30 | # 3. Characters between $normal_base and $normal_base+0x40
31 | # 4. Null characters (marking the end of strings)
32 | # 5. The special string LF,CR
33 | #
34 | # If a character or format read at the input cannot be found in one of the above categories,
35 | # it must be added here before this script will accept it (and DisplayFormatCompressed can
36 | # display it).
37 | #
38 | # Note that these tables are not present in DisplayFormatCompressed, and do not need to
39 | # updated there. Needed information is put in the compression output that it reads.
40 | #
41 | $translate{ord(' ')} = 0;
42 | $translate{172} = 1; # ONE_QUARTER
43 | $translate{171} = 2; # ONE_HALF
44 | $translate{179} = 3; # SINGLE_VERTICAL
45 | $translate{175} = 4; # ANGLE_QUOTE_RIGHT
46 | $translate{ord('!')} = 5;
47 | $translate{ord('"')} = 6;
48 | $translate{ord(',')} = 7;
49 | $translate{ord('-')} = 8;
50 | $translate{ord('.')} = 9;
51 | $translate{ord('/')} = 10;
52 | $translate{ord('1')} = 11;
53 | $translate{ord('2')} = 12;
54 | $translate{ord('3')} = 13;
55 | $translate{ord('4')} = 14;
56 | $translate{ord('5')} = 15;
57 | $translate{ord('6')} = 16;
58 | $translate{ord('8')} = 17;
59 | $translate{200} = 18; # DOUBLE_BOTTOM_LEFT_CORNER
61 |
62 | #
63 | # Formats begin immediately after the last Translated character (they are in the same table)
64 | #
65 | $format_begin = 20;
66 |
67 | $format{"s"} = 20; # n/a
68 | $format{"c"} = 21; # n/a
69 | $format{"2-I"} = 22; # must be even
70 | $format{"u"} = 23; # must be odd
71 | $format{"5-u"} = 24; # must be even
72 | $format{"x"} = 25; # must be odd
73 | $format{"5-x"} = 26; # must be even
74 | $format{"nl"} = 27; # n/a
75 | $format{"2-u"} = 28; # must be even
76 | $format{"A"} = 29; # n/a
77 |
78 | # NOTE: The last $format cannot exceed 31 (stored in a 5-bit quantity).
79 |
80 | #
81 | # Starting point for the "normal" range, typically around 0x40 to cover upper and lower case
82 | # letters. If lower case 'z' is not used, 0x3a can be a good choice as it adds ':' to the
83 | # front end.
84 | #
85 | $normal_base = 0x3a;
86 |
87 | #
88 | # High order code bits, determining which type of character we have (translated or not) and
89 | # if a space or null should come after this character.
90 | #
91 | $code_space = 0xc0;
92 | $code_null = 0x80;
93 | $code_normal = 0x40;
94 | $code_translate = 0x00;
95 |
96 | #
97 | # Bit used if it is a translated byte
98 | #
99 | $code_translate_null = 0x00;
100 | $code_translate_normal = 0x20;
101 |
102 | print ";;;======================================================================\n";
103 | print ";;;\n";
104 | print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
105 | print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
106 | print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
107 | print ";;;\n";
108 | print ";;;======================================================================\n\n";
109 |
110 | #
111 | # Loop through lines of the listing, looking for 'db' lines (and dealing with continuations)
112 | # and compressing each line as it is encountered.
113 | #
114 | while(<>)
115 | {
116 | #
117 | # The <number> indicates a line from an include file, do not include in the output
118 | #
119 | if( /^\s*\d+\s*\<\d\>/ )
120 | {
121 | }
122 |
123 | #
124 | # a 'db' line, with or without a label
125 | #
126 | elsif( /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.*)/i )
127 | {
128 | $bytes = $1;
129 | $continuation = $2;
130 | $label = $3;
131 | $spacing = $4;
132 | $db = $5;
133 | $string = $6;
134 |
135 | print $label.$spacing."; ".$db.$string."\n";
136 |
137 | if( $continuation eq "-" )
138 | {
139 | do
140 | {
141 | $_ = <>;
142 | /^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i || die "parse error on continuation";
143 | $bytes .= $1;
144 | $continuation = $2;
145 | }
146 | while( $continuation eq "-" );
147 | }
148 |
149 | &processString( $bytes, $label.$spacing, $db );
150 | }
151 |
152 | #
153 | # everything else, copy to the output as is
154 | #
155 | elsif( /^\s*\d+\s*(.*)$/ )
156 | {
157 | print $1."\n";
158 | }
159 | }
160 |
161 | print ";;; end of strings.asm\n\n";
162 |
163 | #--------------------------------------------------------------------------------
164 | #
165 | # Output constants and the TranslatesAndFormats table
166 | #
167 |
168 | print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
169 |
170 | print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
171 |
172 | print "StringsCompressed_TranslatesAndFormats: \n";
173 |
174 | foreach $f (keys(%translate))
175 | {
176 | $translate_index[$translate{$f}] = $f;
177 | $used{$f} || die "translate $f unused\n";
178 | $translate{$f} <= 31 || die $translate{$f}.": translate codes must be below 32";
179 | }
180 |
181 | for( $g = 0; $translate_index[$g]; $g++ )
182 | {
183 | print " db ".$translate_index[$g]." ; ".$g."\n";
184 | }
185 |
186 | foreach $f (keys(%format))
187 | {
188 | $n = $f;
189 | $n =~ s/\-/_/g;
190 | $format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
191 | $used{$f} || die "format $f unused\n";
192 | $format{$f} <= 31 || die $format{$f}.": format codes must be below 32";
193 | }
194 |
195 | for( $t = $format_begin; $format_index[$t]; $t++ )
196 | {
197 | print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
198 | }
199 |
200 | print "\n";
201 |
202 | #
203 | # Ensure that branch targets are within reach
204 | #
205 | print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
206 | for( $t = $format_begin; $format_index[$t]; $t++ )
207 | {
208 | print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] || DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
209 | print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
210 | print "%endif\n";
211 | }
212 | print "%endif\n";
213 |
214 | #--------------------------------------------------------------------------------
215 | #
216 | # Output usage statistics
217 | #
218 |
219 | print "\n;; translated usage stats\n";
220 | foreach $f (keys(%special))
221 | {
222 | print ";; ".$f.":".$used{$f}."\n";
223 | $translate_count++;
224 | }
225 | print ";; total translated: ".$translate_count."\n";
226 |
227 | print "\n;; format usage stats\n";
228 | $format_count = 0;
229 | foreach $f (keys(%format))
230 | {
231 | print ";; ".$f.":".$used{$f}."\n";
232 | $format_count++;
233 | }
234 | print ";; total format: ".$format_count."\n";
235 |
236 | print "\n;; alphabet usage stats\n";
237 |
238 | $used_count = 0;
239 | for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
240 | {
241 | print ";; ".$t.",".chr($t).":".$used{$t}."\n";
242 | if( $used{$t} )
243 | {
244 | $used_count++;
245 | }
246 | }
247 | print ";; alphabet used count: ".$used_count."\n";
248 |
249 | #--------------------------------------------------------------------------------
250 | #
251 | # processString does the real compression work...
252 | #
253 |
254 | sub processString
255 | {
256 | $chars = $_[0];
257 | $label = $_[1];
258 | $db = $_[2];
259 |
260 | $label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
261 |
262 | #
263 | # Copy numeric bytes out of hexadecimal pairs in the listing
264 | #
265 | $#v = 0;
266 |
267 | $orig = "";
268 | for( $g = 0; $g < length($chars); $g += 2 )
269 | {
270 | $i = $g/2;
271 | $v[$i] = hex(substr($chars,$g,2));
272 | $orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
273 | }
274 | $v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
275 | # the string is a null
276 |
277 | $output = "";
278 | #
279 | # Loop through bytes...
280 | # looking ahead as needed for possible space and null optimizations, compiling formats
281 | #
282 | for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
283 | {
284 | #
285 | # Special translation of LF,CR to a format
286 | #
287 | if( $v[$g] == 10 && $v[$g+1] == 13 )
288 | {
289 | $g++;
290 | $post = $code_translate;
291 | $code = $format{"nl"};
292 | $used{"nl"}++;
293 | }
294 |
295 | #
296 | # Format operators
297 | #
298 | elsif( $v[$g] == 0x25 ) # "%"
299 | {
300 | $fo = "";
301 | $g++;
302 | if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
303 | {
304 | $fo = $fo.chr($v[$g]);
305 | $g++;
306 | }
307 | if( $v[$g] == ord("-") )
308 | {
309 | $fo = $fo.chr($v[$g]);
310 | $g++;
311 | }
312 | $fo = $fo.chr($v[$g]);
313 |
314 | $format{$fo} || die "unknown format operator: '".$fo."'\n";
315 |
316 | $code = $format{$fo};
317 | $post = $code_translate;
318 | $used{$fo}++;
319 | }
320 |
321 | #
322 | # Translated characters
323 | #
324 | elsif( $v[$g] == 32 || $translate{$v[$g]} )
325 | {
326 | $post = $code_translate;
327 | $code = $translate{$v[$g]};
328 | $used{$v[$g]}++;
329 | }
330 |
331 | #
332 | # "normal" characters (alphabet, and ASCII characters around the alphabet)
333 | #
334 | elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
335 | {
336 | $used{$v[$g]}++;
337 |
338 | $post = $code_normal;
339 | $code = $v[$g] - $normal_base;
340 | }
341 |
342 | #
343 | # Not found
344 | #
345 | else
346 | {
347 | die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
348 | }
349 |
350 | if( $post == $code_translate )
351 | {
352 | #
353 | # NULL optimization (space optimization not possible on translate/format)
354 | #
355 | if( $v[$g+1] == 0 )
356 | {
357 | $g++;
358 | $post = $post | $code_translate_null;
359 | }
360 | else
361 | {
362 | $post = $post | $code_translate_normal;
363 | }
364 | }
365 | else # $post == $code_normal
366 | {
367 | #
368 | # Space optimization
369 | #
370 | if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
371 | {
372 | # can't take this optimization if the next byte is a null,
373 | # since we can't have both a postfix space and null
374 | $g++;
375 | $post = $code_space;
376 | }
377 |
378 | #
379 | # NULL optimization
380 | #
381 | elsif( $v[$g+1] == 0 )
382 | {
383 | $g++;
384 | $post = $code_null;
385 | }
386 | }
387 |
388 | $code = $code | $post;
389 | $output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
390 | }
391 |
392 | print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
393 | print $label." ".$db.substr($output,2);
394 | for( $t = length($output); $t < length($orig); $t++ )
395 | {
396 | print " ";
397 | }
398 | print " ; compressed\n\n";
399 | }
400 |