Context Navigation

source: xtideuniversalbios/trunk/XTIDE_Universal_BIOS/Src/StringsCompress.pl @ 194

Last change on this file since 194 was 194, checked in by gregli@…, 12 years ago
ifdef'd out more unused code. Also added a tool for looking through the listing and the output of the precompiler to aid in finding dead code. Some changes in the files are to add annotations for the tool to avoid false positives.
File size: 10.5 KB

Line
1	#======================================================================================
2	#
3	# Project name : XTIDE Universal BIOS
4	#
5	# Authors : Greg Lindhorst
6	# gregli@hotmail.com
7	#
8	# Description : Script for compiling and compressing strings for
9	# use by DisplayFormatCompressed.asm. See the header of that file
10	# for a description of the compression scheme.
11	#
12	# Usage : stdin: Listing of strings.asm,
13	# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
14	# We used the listing so that the assembler can take care of
15	# resolving %define and EQU symbol definitions.
16	#
17	# stdout: StringsCompressed.asm,
18	# plug replacement for Strings.asm (included by Main.asm)
19	#
20	# Also see the XTIDE makefile for building StringsCompressed.asm
21	#
22
23	#----------------------------------------------------------------------
24	#
25	# Translated and Format characters
26	#
27	# DisplayFormatCompressed can only deal with characters in one of the following categories:
28	# 1. Those in the Translate associative array
29	# 2. Those in the Format associative array
30	# 3. Characters between $normal_base and $normal_base+0x40
31	# 4. Null characters (marking the end of strings)
32	# 5. The special string LF,CR
33	#
34	# If a character or format read at the input cannot be found in one of the above categories,
35	# it must be added here before this script will accept it (and DisplayFormatCompressed can
36	# display it).
37	#
38	# Note that these tables are not present in DisplayFormatCompressed, and do not need to
39	# updated there. Needed information is put in the compression output that it reads.
40	#
41	$translate{ord(' ')} = 0;
42	$translate{172} = 1; # ONE_QUARTER
43	$translate{171} = 2; # ONE_HALF
44	$translate{179} = 3; # SINGLE_VERTICAL
45	$translate{175} = 4; # ANGLE_QUOTE_RIGHT
46	$translate{ord('!')} = 5;
47	$translate{ord('"')} = 6;
48	$translate{ord(',')} = 7;
49	$translate{ord('-')} = 8;
50	$translate{ord('.')} = 9;
51	$translate{ord('/')} = 10;
52	$translate{ord('1')} = 11;
53	$translate{ord('2')} = 12;
54	$translate{ord('3')} = 13;
55	$translate{ord('4')} = 14;
56	$translate{ord('5')} = 15;
57	$translate{ord('6')} = 16;
58	$translate{ord('8')} = 17;
59	$translate{200} = 18; # DOUBLE_BOTTOM_LEFT_CORNER
60	$translate{181} = 19; # DOUBLE_LEFT_HORIZONTAL_TO_SINGLE_VERTICAL
61
62	#
63	# Formats begin immediately after the last Translated character (they are in the same table)
64	#
65	$format_begin = 20;
66
67	$format{"s"} = 20; # n/a
68	$format{"c"} = 21; # n/a
69	$format{"2-I"} = 22; # must be even
70	$format{"u"} = 23; # must be odd
71	$format{"5-u"} = 24; # must be even
72	$format{"x"} = 25; # must be odd
73	$format{"5-x"} = 26; # must be even
74	$format{"nl"} = 27; # n/a
75	$format{"2-u"} = 28; # must be even
76	$format{"A"} = 29; # n/a
77
78	# NOTE: The last $format cannot exceed 31 (stored in a 5-bit quantity).
79
80	#
81	# Starting point for the "normal" range, typically around 0x40 to cover upper and lower case
82	# letters. If lower case 'z' is not used, 0x3a can be a good choice as it adds ':' to the
83	# front end.
84	#
85	$normal_base = 0x3a;
86
87	#
88	# High order code bits, determining which type of character we have (translated or not) and
89	# if a space or null should come after this character.
90	#
91	$code_space = 0xc0;
92	$code_null = 0x80;
93	$code_normal = 0x40;
94	$code_translate = 0x00;
95
96	#
97	# Bit used if it is a translated byte
98	#
99	$code_translate_null = 0x00;
100	$code_translate_normal = 0x20;
101
102	print ";;;======================================================================\n";
103	print ";;;\n";
104	print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
105	print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
106	print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
107	print ";;;\n";
108	print ";;;======================================================================\n\n";
109
110	#
111	# Loop through lines of the listing, looking for 'db' lines (and dealing with continuations)
112	# and compressing each line as it is encountered.
113	#
114	while(<>)
115	{
116	#
117	# The <number> indicates a line from an include file, do not include in the output
118	#
119	if( /^\s\d+\s\<\d\>/ )
120	{
121	}
122
123	#
124	# a 'db' line, with or without a label
125	#
126	elsif( /^\s\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.)/i )
127	{
128	$bytes = $1;
129	$continuation = $2;
130	$label = $3;
131	$spacing = $4;
132	$db = $5;
133	$string = $6;
134
135	print $label.$spacing."; ".$db.$string."\n";
136
137	if( $continuation eq "-" )
138	{
139	do
140	{
141	$_ = <>;
142	/^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i \|\| die "parse error on continuation";
143	$bytes .= $1;
144	$continuation = $2;
145	}
146	while( $continuation eq "-" );
147	}
148
149	&processString( $bytes, $label.$spacing, $db );
150	}
151
152	#
153	# everything else, copy to the output as is
154	#
155	elsif( /^\s\d+\s(.*)$/ )
156	{
157	print $1."\n";
158	}
159	}
160
161	print ";;; end of strings.asm\n\n";
162
163	#--------------------------------------------------------------------------------
164	#
165	# Output constants and the TranslatesAndFormats table
166	#
167
168	print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
169
170	print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
171
172	print "StringsCompressed_TranslatesAndFormats: \n";
173
174	foreach $f (keys(%translate))
175	{
176	$translate_index[$translate{$f}] = $f;
177	$used{$f} \|\| print "translate $f unused\n";
178	$translate{$f} <= 31 \|\| die $translate{$f}.": translate codes must be below 32";
179	}
180
181	for( $g = 0; $translate_index[$g]; $g++ )
182	{
183	print " db ".$translate_index[$g]." ; ".$g."\n";
184	}
185
186	foreach $f (keys(%format))
187	{
188	$n = $f;
189	$n =~ s/\-/_/g;
190	$format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
191	$used{$f} \|\| die "format $f unused\n";
192	$format{$f} <= 31 \|\| die $format{$f}.": format codes must be below 32";
193	}
194
195	for( $t = $format_begin; $format_index[$t]; $t++ )
196	{
197	print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
198	}
199
200	print "\n";
201
202	#
203	# Ensure that branch targets are within reach
204	#
205	print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
206	for( $t = $format_begin; $format_index[$t]; $t++ )
207	{
208	print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] \|\| DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
209	print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
210	print "%endif\n";
211	}
212	print "%endif\n";
213
214	#--------------------------------------------------------------------------------
215	#
216	# Output usage statistics
217	#
218
219	print "\n;; translated usage stats\n";
220	foreach $f (keys(%special))
221	{
222	print ";; ".$f.":".$used{$f}."\n";
223	$translate_count++;
224	}
225	print ";; total translated: ".$translate_count."\n";
226
227	print "\n;; format usage stats\n";
228	$format_count = 0;
229	foreach $f (keys(%format))
230	{
231	print ";; ".$f.":".$used{$f}."\n";
232	$format_count++;
233	}
234	print ";; total format: ".$format_count."\n";
235
236	print "\n;; alphabet usage stats\n";
237
238	$used_count = 0;
239	for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
240	{
241	print ";; ".$t.",".chr($t).":".$used{$t}."\n";
242	if( $used{$t} )
243	{
244	$used_count++;
245	}
246	}
247	print ";; alphabet used count: ".$used_count."\n";
248
249	#--------------------------------------------------------------------------------
250	#
251	# processString does the real compression work...
252	#
253
254	sub processString
255	{
256	$chars = $_[0];
257	$label = $_[1];
258	$db = $_[2];
259
260	$label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
261
262	#
263	# Copy numeric bytes out of hexadecimal pairs in the listing
264	#
265	$#v = 0;
266
267	$orig = "";
268	for( $g = 0; $g < length($chars); $g += 2 )
269	{
270	$i = $g/2;
271	$v[$i] = hex(substr($chars,$g,2));
272	$orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
273	}
274	$v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
275	# the string is a null
276
277	$output = "";
278	#
279	# Loop through bytes...
280	# looking ahead as needed for possible space and null optimizations, compiling formats
281	#
282	for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
283	{
284	#
285	# Special translation of LF,CR to a format
286	#
287	if( $v[$g] == 10 && $v[$g+1] == 13 )
288	{
289	$g++;
290	$post = $code_translate;
291	$code = $format{"nl"};
292	$used{"nl"}++;
293	}
294
295	#
296	# Format operators
297	#
298	elsif( $v[$g] == 0x25 ) # "%"
299	{
300	$fo = "";
301	$g++;
302	if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
303	{
304	$fo = $fo.chr($v[$g]);
305	$g++;
306	}
307	if( $v[$g] == ord("-") )
308	{
309	$fo = $fo.chr($v[$g]);
310	$g++;
311	}
312	$fo = $fo.chr($v[$g]);
313
314	$format{$fo} \|\| die "unknown format operator: '".$fo."'\n";
315
316	$code = $format{$fo};
317	$post = $code_translate;
318	$used{$fo}++;
319	}
320
321	#
322	# Translated characters
323	#
324	elsif( $v[$g] == 32 \|\| $translate{$v[$g]} )
325	{
326	$post = $code_translate;
327	$code = $translate{$v[$g]};
328	$used{$v[$g]}++;
329	}
330
331	#
332	# "normal" characters (alphabet, and ASCII characters around the alphabet)
333	#
334	elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
335	{
336	$used{$v[$g]}++;
337
338	$post = $code_normal;
339	$code = $v[$g] - $normal_base;
340	}
341
342	#
343	# Not found
344	#
345	else
346	{
347	die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
348	}
349
350	if( $post == $code_translate )
351	{
352	#
353	# NULL optimization (space optimization not possible on translate/format)
354	#
355	if( $v[$g+1] == 0 )
356	{
357	$g++;
358	$post = $post \| $code_translate_null;
359	}
360	else
361	{
362	$post = $post \| $code_translate_normal;
363	}
364	}
365	else # $post == $code_normal
366	{
367	#
368	# Space optimization
369	#
370	if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
371	{
372	# can't take this optimization if the next byte is a null,
373	# since we can't have both a postfix space and null
374	$g++;
375	$post = $code_space;
376	}
377
378	#
379	# NULL optimization
380	#
381	elsif( $v[$g+1] == 0 )
382	{
383	$g++;
384	$post = $code_null;
385	}
386	}
387
388	$code = $code \| $post;
389	$output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
390	}
391
392	print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
393	print $label." ".$db.substr($output,2);
394	for( $t = length($output); $t < length($orig); $t++ )
395	{
396	print " ";
397	}
398	print " ; compressed\n\n";
399	}
400

Note: See TracBrowser for help on using the repository browser.

Download in other formats: