Context Navigation

source: xtideuniversalbios/trunk/Tools/StringsCompress.pl@ 421

Visit:

Last change on this file since 421 was 376, checked in by gregli@…, 13 years ago
WIDE checkin... Added copyright and license information to sorce files, as per the GPL instructions for usage.
File size: 11.2 KB

Line
1	#======================================================================================
2	#
3	# Project name : XTIDE Universal BIOS
4	#
5	# Authors : Greg Lindhorst
6	# gregli@hotmail.com
7	#
8	# Description : Script for compiling and compressing strings for
9	# use by DisplayFormatCompressed.asm. See the header of that file
10	# for a description of the compression scheme.
11	#
12	# XTIDE Universal BIOS and Associated Tools
13	# Copyright (C) 2009-2010 by Tomi Tilli, 2011-2012 by XTIDE Universal BIOS Team.
14	#
15	# This program is free software; you can redistribute it and/or modify
16	# it under the terms of the GNU General Public License as published by
17	# the Free Software Foundation; either version 2 of the License, or
18	# (at your option) any later version.
19	#
20	# This program is distributed in the hope that it will be useful,
21	# but WITHOUT ANY WARRANTY; without even the implied warranty of
22	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23	# GNU General Public License for more details.
24	# Visit http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
25	#
26
27	#
28	# Usage : stdin: Listing of strings.asm,
29	# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
30	# We used the listing so that the assembler can take care of
31	# resolving %define and EQU symbol definitions.
32	#
33	# stdout: StringsCompressed.asm,
34	# plug replacement for Strings.asm (included by Main.asm)
35	#
36	# Also see the XTIDE makefile for building StringsCompressed.asm
37	#
38
39	#----------------------------------------------------------------------
40	#
41	# Translated, Format, and "Normal" characters
42	#
43	# DisplayFormatCompressed can only deal with characters in one of the following categories:
44	# 1. Those in the Translate associative array
45	# 2. Those in the Format associative array
46	# 3. Characters between $normal_base and $normal_base+0x40
47	# (typically covers upper and lower case alphabets)
48	# 4. Null characters (marking the end of strings)
49	# 5. The special string LF,CR
50	#
51	# If a character or format read at the input cannot be found in one of the above categories,
52	# it must be added here before this script will accept it (and DisplayFormatCompressed can
53	# display it).
54	#
55	# Tables for the above categories are expected in the input stream, before string to be
56	# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
57	# and do not need to be updated there. Needed information is put in the compression output
58	# that it reads.
59	#
60
61	#
62	# High order code bits, determining which type of character we have (translated or not) and
63	# if a space or null should come after this character.
64	#
65	$code_space = 0xc0;
66	$code_null = 0x80;
67	$code_normal = 0x40;
68	$code_translate = 0x00;
69
70	#
71	# Bit used if it is a translated byte
72	#
73	$code_translate_null = 0x00;
74	$code_translate_normal = 0x20;
75
76	print ";;;======================================================================\n";
77	print ";;;\n";
78	print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
79	print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
80	print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
81	print ";;;\n";
82	print ";;;======================================================================\n\n";
83
84	print "%ifdef STRINGSCOMPRESSED_STRINGS\n\n";
85
86	#
87	# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
88	# are expected in the input stream.
89	#
90	$processed = " [StringsCompress Processed]";
91	while(<>)
92	{
93	chop;
94	$o = $_;
95
96	#
97	# Table entries for this script
98	#
99	if( /^\s\d+\s(\;\$translate\{\sord\(\s'(.)'\s\)\s\}\s=\s([0-9]+).*$)/ )
100	{
101	$translate{ord($2)} = int($3);
102	$o .= $processed;
103	}
104	elsif( /^\s\d+\s(\;\$translate\{\s([0-9]+)\s\}\s=\s([0-9]+).*$)/ )
105	{
106	$translate{int($2)} = int($3);
107	$o .= $processed;
108	}
109	elsif( /^\s\d+\s(\;\$format_begin\s=\s([0-9]+).*$)/ )
110	{
111	$format_begin = int($2);
112	$o .= $processed;
113	}
114	elsif( /^\s\d+\s(\;\$format\{\s\"([^\"]+)\"\s\}\s=\s([0-9]+).*$)/ )
115	{
116	$format{$2} = int($3);
117	$o .= $processed;
118	}
119	elsif( /^\s\d+\s(\;\$normal_base\s=\s0x([0-9a-fA-F]+).*$)/ )
120	{
121	$normal_base = hex($2);
122	$o .= $processed;
123	}
124	elsif( /^\s\d+\s(\;\$normal_base\s=\s([0-9]+).*$)/ )
125	{
126	$normal_base = int($2);
127	$o .= $processed;
128	}
129
130	push( @lines, $o );
131	}
132
133	#
134	# On the second pass, loop through lines of the listing, looking for 'db' lines
135	# (and dealing with continuations) and compressing each line as it is encountered.
136	#
137	for( $l = 0; $l < $#lines; $l++ )
138	{
139	$_ = $lines[$l];
140
141	#
142	# The <number> indicates a line from an include file, do not include in the output
143	#
144	if( /^\s\d+\s\<\d\>/ )
145	{
146	}
147
148	#
149	# a 'db' line, with or without a label
150	#
151	elsif( /^\s\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.)/i )
152	{
153	$bytes = $1;
154	$continuation = $2;
155	$label = $3;
156	$spacing = $4;
157	$db = $5;
158	$string = $6;
159
160	print $label.$spacing."; ".$db.$string."\n";
161
162	if( $continuation eq "-" )
163	{
164	do
165	{
166	$_ = $lines[++$l];
167	/^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i \|\| die "parse error on continuation: '".$_."'";
168	$bytes .= $1;
169	$continuation = $2;
170	}
171	while( $continuation eq "-" );
172	}
173
174	&processString( $bytes, $label.$spacing, $db );
175	}
176
177	#
178	# a ';%%;' prefix line, copy to output without the prefix
179	#
180	elsif( /^\s\d+\s;%%;\s(.)$/ )
181	{
182	print $1."\n";
183	}
184
185	#
186	# everything else, copy to the output as is
187	#
188	elsif( /^\s\d+\s(.*)$/ )
189	{
190	print $1."\n";
191	}
192	}
193
194	print ";;; end of input stream\n\n";
195
196	#--------------------------------------------------------------------------------
197	#
198	# Output constants and the TranslatesAndFormats table
199	#
200
201	print "%endif ; STRINGSCOMPRESSED_STRINGS\n\n";
202	print "%ifdef STRINGSCOMPRESSED_TABLES\n\n";
203
204	print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
205
206	print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
207
208	print "StringsCompressed_TranslatesAndFormats: \n";
209
210	foreach $f (keys(%translate))
211	{
212	$translate_index[$translate{$f}] = $f;
213	$used{$f} \|\| die "translate $f unused\n";
214	$translate{$f} <= 31 \|\| die $translate{$f}.": translate codes must be below 32";
215	}
216
217	for( $g = 0; $translate_index[$g]; $g++ )
218	{
219	print " db ".$translate_index[$g]." ; ".$g."\n";
220	}
221
222	foreach $f (keys(%format))
223	{
224	$n = $f;
225	$n =~ s/\-/_/g;
226	$format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
227	$used{$f} \|\| die "format $f unused\n";
228	$format{$f} <= 31 \|\| die $format{$f}.": format codes must be below 32";
229	}
230
231	for( $t = $format_begin; $format_index[$t]; $t++ )
232	{
233	print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
234	}
235
236	print "\n";
237
238	#
239	# Ensure that branch targets are within reach
240	#
241	print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
242	for( $t = $format_begin; $format_index[$t]; $t++ )
243	{
244	print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] \|\| DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
245	print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
246	print "%endif\n";
247	}
248	print "%endif\n";
249
250	#--------------------------------------------------------------------------------
251	#
252	# Output usage statistics
253	#
254
255	print "\n;; translated usage stats\n";
256	foreach $f (keys(%translate))
257	{
258	print ";; ".$f.":".$used{$f}."\n";
259	$translate_count++;
260	}
261	print ";; total translated: ".$translate_count."\n";
262
263	print "\n;; format usage stats\n";
264	$format_count = 0;
265	foreach $f (keys(%format))
266	{
267	print ";; ".$f.":".$used{$f}."\n";
268	$format_count++;
269	}
270	print ";; total format: ".$format_count."\n";
271
272	print "\n;; alphabet usage stats\n";
273
274	$used_count = 0;
275	for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
276	{
277	print ";; ".$t.",".chr($t).":".$used{$t}."\n";
278	if( $used{$t} )
279	{
280	$used_count++;
281	}
282	}
283	print ";; alphabet used count: ".$used_count."\n";
284
285	print "%endif ; STRINGSCOMPRESSED_TABLES\n\n";
286
287	#--------------------------------------------------------------------------------
288	#
289	# processString does the real compression work...
290	#
291
292	sub processString
293	{
294	$chars = $_[0];
295	$label = $_[1];
296	$db = $_[2];
297
298	$label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
299
300	#
301	# Copy numeric bytes out of hexadecimal pairs in the listing
302	#
303	$#v = 0;
304
305	$orig = "";
306	for( $g = 0; $g < length($chars); $g += 2 )
307	{
308	$i = $g/2;
309	$v[$i] = hex(substr($chars,$g,2));
310	$orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
311	}
312	$v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
313	# the string is a null
314
315	$output = "";
316	#
317	# Loop through bytes...
318	# looking ahead as needed for possible space and null optimizations, compiling formats
319	#
320	for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
321	{
322	#
323	# Special translation of LF,CR to a format
324	#
325	if( $v[$g] == 10 && $v[$g+1] == 13 )
326	{
327	$g++;
328	$post = $code_translate;
329	$code = $format{"nl"};
330	$used{"nl"}++;
331	}
332
333	#
334	# Format operators
335	#
336	elsif( $v[$g] == 0x25 ) # "%"
337	{
338	$fo = "";
339	$g++;
340	if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
341	{
342	$fo = $fo.chr($v[$g]);
343	$g++;
344	}
345	if( $v[$g] == ord("-") )
346	{
347	$fo = $fo.chr($v[$g]);
348	$g++;
349	}
350	$fo = $fo.chr($v[$g]);
351
352	$format{$fo} \|\| die "unknown format operator: '".$fo."'\n";
353
354	$code = $format{$fo};
355	$post = $code_translate;
356	$used{$fo}++;
357	}
358
359	#
360	# Translated characters
361	#
362	elsif( $v[$g] == 32 \|\| $translate{$v[$g]} )
363	{
364	$post = $code_translate;
365	$code = $translate{$v[$g]};
366	$used{$v[$g]}++;
367	}
368
369	#
370	# "normal" characters (alphabet, and ASCII characters around the alphabet)
371	#
372	elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
373	{
374	$used{$v[$g]}++;
375
376	$post = $code_normal;
377	$code = $v[$g] - $normal_base;
378	}
379
380	#
381	# Not found
382	#
383	else
384	{
385	die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
386	}
387
388	if( $post == $code_translate )
389	{
390	#
391	# NULL optimization (space optimization not possible on translate/format)
392	#
393	if( $v[$g+1] == 0 )
394	{
395	$g++;
396	$post = $post \| $code_translate_null;
397	}
398	else
399	{
400	$post = $post \| $code_translate_normal;
401	}
402	}
403	else # $post == $code_normal
404	{
405	#
406	# Space optimization
407	#
408	if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
409	{
410	# can't take this optimization if the next byte is a null,
411	# since we can't have both a postfix space and null
412	$g++;
413	$post = $code_space;
414	}
415
416	#
417	# NULL optimization
418	#
419	elsif( $v[$g+1] == 0 )
420	{
421	$g++;
422	$post = $code_null;
423	}
424	}
425
426	$code = $code \| $post;
427	$output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
428	}
429
430	print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
431	print $label." ".$db.substr($output,2);
432	for( $t = length($output); $t < length($orig); $t++ )
433	{
434	print " ";
435	}
436	print " ; compressed\n\n";
437	}
438

Note: See TracBrowser for help on using the repository browser.

Download in other formats: