Context Navigation

source: xtideuniversalbios/trunk/Tools/StringsCompress.pl @ 277

Last change on this file since 277 was 277, checked in by gregli@…, 12 years ago
Moved the bulk of the serial code to the assembly library, for inclusion in other utilities. Fixed a bug in int13h.asm when floppy support was not enabled that was preventing foreign drives from working properly.
File size: 10.4 KB

Line
1	#======================================================================================
2	#
3	# Project name : XTIDE Universal BIOS
4	#
5	# Authors : Greg Lindhorst
6	# gregli@hotmail.com
7	#
8	# Description : Script for compiling and compressing strings for
9	# use by DisplayFormatCompressed.asm. See the header of that file
10	# for a description of the compression scheme.
11	#
12	# Usage : stdin: Listing of strings.asm,
13	# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14	# We used the listing so that the assembler can take care of
15	# resolving %define and EQU symbol definitions.
16	#
17	# stdout: StringsCompressed.asm,
18	# plug replacement for Strings.asm (included by Main.asm)
19	#
20	# Also see the XTIDE makefile for building StringsCompressed.asm
21	#
22
23	#----------------------------------------------------------------------
24	#
25	# Translated, Format, and "Normal" characters
26	#
27	# DisplayFormatCompressed can only deal with characters in one of the following categories:
28	# 1. Those in the Translate associative array
29	# 2. Those in the Format associative array
30	# 3. Characters between $normal_base and $normal_base+0x40
31	# (typically covers upper and lower case alphabets)
32	# 4. Null characters (marking the end of strings)
33	# 5. The special string LF,CR
34	#
35	# If a character or format read at the input cannot be found in one of the above categories,
36	# it must be added here before this script will accept it (and DisplayFormatCompressed can
37	# display it).
38	#
39	# Tables for the above categories are expected in the input stream, before string to be
40	# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
41	# and do not need to be updated there. Needed information is put in the compression output
42	# that it reads.
43	#
44
45	#
46	# High order code bits, determining which type of character we have (translated or not) and
47	# if a space or null should come after this character.
48	#
49	$code_space = 0xc0;
50	$code_null = 0x80;
51	$code_normal = 0x40;
52	$code_translate = 0x00;
53
54	#
55	# Bit used if it is a translated byte
56	#
57	$code_translate_null = 0x00;
58	$code_translate_normal = 0x20;
59
60	print ";;;======================================================================\n";
61	print ";;;\n";
62	print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
63	print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
64	print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
65	print ";;;\n";
66	print ";;;======================================================================\n\n";
67
68
69	#
70	# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
71	# are expected in the input stream.
72	#
73	$processed = " [StringsCompress Processed]";
74	while(<>)
75	{
76	chop;
77	$o = $_;
78
79	#
80	# Table entries for this script
81	#
82	if( /^\s\d+\s(\;\$translate\{\sord\(\s'(.)'\s\)\s\}\s=\s([0-9]+).*$)/ )
83	{
84	$translate{ord($2)} = int($3);
85	$o .= $processed;
86	}
87	elsif( /^\s\d+\s(\;\$translate\{\s([0-9]+)\s\}\s=\s([0-9]+).*$)/ )
88	{
89	$translate{int($2)} = int($3);
90	$o .= $processed;
91	}
92	elsif( /^\s\d+\s(\;\$format_begin\s=\s([0-9]+).*$)/ )
93	{
94	$format_begin = int($2);
95	$o .= $processed;
96	}
97	elsif( /^\s\d+\s(\;\$format\{\s\"([^\"]+)\"\s\}\s=\s([0-9]+).*$)/ )
98	{
99	$format{$2} = int($3);
100	$o .= $processed;
101	}
102	elsif( /^\s\d+\s(\;\$normal_base\s=\s0x([0-9a-fA-F]+).*$)/ )
103	{
104	$normal_base = hex($2);
105	$o .= $processed;
106	}
107	elsif( /^\s\d+\s(\;\$normal_base\s=\s([0-9]+).*$)/ )
108	{
109	$normal_base = int($2);
110	$o .= $processed;
111	}
112
113	push( @lines, $o );
114	}
115
116	#
117	# On the second pass, loop through lines of the listing, looking for 'db' lines
118	# (and dealing with continuations) and compressing each line as it is encountered.
119	#
120	for( $l = 0; $l < $#lines; $l++ )
121	{
122	$_ = $lines[$l];
123
124	#
125	# The <number> indicates a line from an include file, do not include in the output
126	#
127	if( /^\s\d+\s\<\d\>/ )
128	{
129	}
130
131	#
132	# a 'db' line, with or without a label
133	#
134	elsif( /^\s\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.)/i )
135	{
136	$bytes = $1;
137	$continuation = $2;
138	$label = $3;
139	$spacing = $4;
140	$db = $5;
141	$string = $6;
142
143	print $label.$spacing."; ".$db.$string."\n";
144
145	if( $continuation eq "-" )
146	{
147	do
148	{
149	$_ = $lines[++$l];
150	/^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i \|\| die "parse error on continuation: '".$_."'";
151	$bytes .= $1;
152	$continuation = $2;
153	}
154	while( $continuation eq "-" );
155	}
156
157	&processString( $bytes, $label.$spacing, $db );
158	}
159
160	#
161	# a ';%%;' prefix line, copy to output without the prefix
162	#
163	elsif( /^\s\d+\s;%%;\s(.)$/ )
164	{
165	print $1."\n";
166	}
167
168	#
169	# everything else, copy to the output as is
170	#
171	elsif( /^\s\d+\s(.*)$/ )
172	{
173	print $1."\n";
174	}
175	}
176
177	print ";;; end of input stream\n\n";
178
179	#--------------------------------------------------------------------------------
180	#
181	# Output constants and the TranslatesAndFormats table
182	#
183
184	print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
185
186	print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
187
188	print "StringsCompressed_TranslatesAndFormats: \n";
189
190	foreach $f (keys(%translate))
191	{
192	$translate_index[$translate{$f}] = $f;
193	$used{$f} \|\| die "translate $f unused\n";
194	$translate{$f} <= 31 \|\| die $translate{$f}.": translate codes must be below 32";
195	}
196
197	for( $g = 0; $translate_index[$g]; $g++ )
198	{
199	print " db ".$translate_index[$g]." ; ".$g."\n";
200	}
201
202	foreach $f (keys(%format))
203	{
204	$n = $f;
205	$n =~ s/\-/_/g;
206	$format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
207	$used{$f} \|\| die "format $f unused\n";
208	$format{$f} <= 31 \|\| die $format{$f}.": format codes must be below 32";
209	}
210
211	for( $t = $format_begin; $format_index[$t]; $t++ )
212	{
213	print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
214	}
215
216	print "\n";
217
218	#
219	# Ensure that branch targets are within reach
220	#
221	print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
222	for( $t = $format_begin; $format_index[$t]; $t++ )
223	{
224	print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] \|\| DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
225	print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
226	print "%endif\n";
227	}
228	print "%endif\n";
229
230	#--------------------------------------------------------------------------------
231	#
232	# Output usage statistics
233	#
234
235	print "\n;; translated usage stats\n";
236	foreach $f (keys(%translate))
237	{
238	print ";; ".$f.":".$used{$f}."\n";
239	$translate_count++;
240	}
241	print ";; total translated: ".$translate_count."\n";
242
243	print "\n;; format usage stats\n";
244	$format_count = 0;
245	foreach $f (keys(%format))
246	{
247	print ";; ".$f.":".$used{$f}."\n";
248	$format_count++;
249	}
250	print ";; total format: ".$format_count."\n";
251
252	print "\n;; alphabet usage stats\n";
253
254	$used_count = 0;
255	for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
256	{
257	print ";; ".$t.",".chr($t).":".$used{$t}."\n";
258	if( $used{$t} )
259	{
260	$used_count++;
261	}
262	}
263	print ";; alphabet used count: ".$used_count."\n";
264
265	#--------------------------------------------------------------------------------
266	#
267	# processString does the real compression work...
268	#
269
270	sub processString
271	{
272	$chars = $_[0];
273	$label = $_[1];
274	$db = $_[2];
275
276	$label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
277
278	#
279	# Copy numeric bytes out of hexadecimal pairs in the listing
280	#
281	$#v = 0;
282
283	$orig = "";
284	for( $g = 0; $g < length($chars); $g += 2 )
285	{
286	$i = $g/2;
287	$v[$i] = hex(substr($chars,$g,2));
288	$orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
289	}
290	$v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
291	# the string is a null
292
293	$output = "";
294	#
295	# Loop through bytes...
296	# looking ahead as needed for possible space and null optimizations, compiling formats
297	#
298	for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
299	{
300	#
301	# Special translation of LF,CR to a format
302	#
303	if( $v[$g] == 10 && $v[$g+1] == 13 )
304	{
305	$g++;
306	$post = $code_translate;
307	$code = $format{"nl"};
308	$used{"nl"}++;
309	}
310
311	#
312	# Format operators
313	#
314	elsif( $v[$g] == 0x25 ) # "%"
315	{
316	$fo = "";
317	$g++;
318	if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
319	{
320	$fo = $fo.chr($v[$g]);
321	$g++;
322	}
323	if( $v[$g] == ord("-") )
324	{
325	$fo = $fo.chr($v[$g]);
326	$g++;
327	}
328	$fo = $fo.chr($v[$g]);
329
330	$format{$fo} \|\| die "unknown format operator: '".$fo."'\n";
331
332	$code = $format{$fo};
333	$post = $code_translate;
334	$used{$fo}++;
335	}
336
337	#
338	# Translated characters
339	#
340	elsif( $v[$g] == 32 \|\| $translate{$v[$g]} )
341	{
342	$post = $code_translate;
343	$code = $translate{$v[$g]};
344	$used{$v[$g]}++;
345	}
346
347	#
348	# "normal" characters (alphabet, and ASCII characters around the alphabet)
349	#
350	elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
351	{
352	$used{$v[$g]}++;
353
354	$post = $code_normal;
355	$code = $v[$g] - $normal_base;
356	}
357
358	#
359	# Not found
360	#
361	else
362	{
363	die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
364	}
365
366	if( $post == $code_translate )
367	{
368	#
369	# NULL optimization (space optimization not possible on translate/format)
370	#
371	if( $v[$g+1] == 0 )
372	{
373	$g++;
374	$post = $post \| $code_translate_null;
375	}
376	else
377	{
378	$post = $post \| $code_translate_normal;
379	}
380	}
381	else # $post == $code_normal
382	{
383	#
384	# Space optimization
385	#
386	if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
387	{
388	# can't take this optimization if the next byte is a null,
389	# since we can't have both a postfix space and null
390	$g++;
391	$post = $code_space;
392	}
393
394	#
395	# NULL optimization
396	#
397	elsif( $v[$g+1] == 0 )
398	{
399	$g++;
400	$post = $code_null;
401	}
402	}
403
404	$code = $code \| $post;
405	$output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
406	}
407
408	print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
409	print $label." ".$db.substr($output,2);
410	for( $t = length($output); $t < length($orig); $t++ )
411	{
412	print " ";
413	}
414	print " ; compressed\n\n";
415	}
416

Note: See TracBrowser for help on using the repository browser.

Download in other formats: