Context Navigation

source: xtideuniversalbios/trunk/Tools/StringsCompress.pl@ 198

Visit:

Last change on this file since 198 was 197, checked in by gregli@…, 13 years ago
Some maintenance; no changes to the actual source. Moved the compression tables out of the compression script and into the source file, making the compression script source agnostic. And thus moved the compression script to the Tools directory.
File size: 10.2 KB

Rev	Line
[189]	1	#======================================================================================
	2	#
	3	# Project name : XTIDE Universal BIOS
	4	#
	5	# Authors : Greg Lindhorst
	6	# gregli@hotmail.com
	7	#
	8	# Description : Script for compiling and compressing strings for
	9	# use by DisplayFormatCompressed.asm. See the header of that file
	10	# for a description of the compression scheme.
	11	#
	12	# Usage : stdin: Listing of strings.asm,
	13	# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
	14	# We used the listing so that the assembler can take care of
	15	# resolving %define and EQU symbol definitions.
	16	#
	17	# stdout: StringsCompressed.asm,
	18	# plug replacement for Strings.asm (included by Main.asm)
	19	#
	20	# Also see the XTIDE makefile for building StringsCompressed.asm
	21	#
	22
	23	#----------------------------------------------------------------------
	24	#
[197]	25	# Translated, Format, and "Normal" characters
[189]	26	#
	27	# DisplayFormatCompressed can only deal with characters in one of the following categories:
	28	# 1. Those in the Translate associative array
	29	# 2. Those in the Format associative array
[197]	30	# 3. Characters between $normal_base and $normal_base+0x40
	31	# (typically covers upper and lowe case alphabets)
[189]	32	# 4. Null characters (marking the end of strings)
	33	# 5. The special string LF,CR
	34	#
	35	# If a character or format read at the input cannot be found in one of the above categories,
	36	# it must be added here before this script will accept it (and DisplayFormatCompressed can
	37	# display it).
	38	#
[197]	39	# Tables for the above categories are expected in the input stream, before string to be
	40	# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
	41	# and do not need to updated there. Needed information is put in the compression output
	42	# that it reads.
[189]	43	#
	44
	45	#
	46	# High order code bits, determining which type of character we have (translated or not) and
	47	# if a space or null should come after this character.
	48	#
	49	$code_space = 0xc0;
	50	$code_null = 0x80;
	51	$code_normal = 0x40;
	52	$code_translate = 0x00;
	53
	54	#
	55	# Bit used if it is a translated byte
	56	#
	57	$code_translate_null = 0x00;
	58	$code_translate_normal = 0x20;
	59
	60	print ";;;======================================================================\n";
	61	print ";;;\n";
	62	print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
	63	print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
	64	print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
	65	print ";;;\n";
	66	print ";;;======================================================================\n\n";
	67
[197]	68
[189]	69	#
[197]	70	# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
	71	# are expectd in the input stream.
[189]	72	#
[197]	73	$processed = " [StringsCompress Processed]";
[189]	74	while(<>)
	75	{
[197]	76	chop;
	77	$o = $_;
	78
[189]	79	#
[197]	80	# Table entries for this script
	81	#
	82	if( /^\s\d+\s(\;\$translate\{\sord\(\s'(.)'\s\)\s\}\s=\s([0-9]+).*$)/ )
	83	{
	84	$translate{ord($2)} = int($3);
	85	$o .= $processed;
	86	}
	87	elsif( /^\s\d+\s(\;\$translate\{\s([0-9]+)\s\}\s=\s([0-9]+).*$)/ )
	88	{
	89	$translate{int($2)} = int($3);
	90	$o .= $processed;
	91	}
	92	elsif( /^\s\d+\s(\;\$format_begin\s=\s([0-9]+).*$)/ )
	93	{
	94	$format_begin = int($2);
	95	$o .= $processed;
	96	}
	97	elsif( /^\s\d+\s(\;\$format\{\s\"([^\"]+)\"\s\}\s=\s([0-9]+).*$)/ )
	98	{
	99	$format{$2} = int($3);
	100	$o .= $processed;
	101	}
	102	elsif( /^\s\d+\s(\;\$normal_base\s=\s0x([0-9a-fA-F]+).*$)/ )
	103	{
	104	$normal_base = hex($2);
	105	$o .= $processed;
	106	}
	107	elsif( /^\s\d+\s(\;\$normal_base\s=\s([0-9]+).*$)/ )
	108	{
	109	$normal_base = int($2);
	110	$o .= $processed;
	111	}
	112
	113	push( @lines, $o );
	114	}
	115
	116	#
	117	# On the second pass, loop through lines of the listing, looking for 'db' lines
	118	# (and dealing with continuations) and compressing each line as it is encountered.
	119	#
	120	for( $l = 0; $l < $#lines; $l++ )
	121	{
	122	$_ = $lines[$l];
	123
	124	#
[189]	125	# The <number> indicates a line from an include file, do not include in the output
	126	#
	127	if( /^\s\d+\s\<\d\>/ )
	128	{
	129	}
	130
	131	#
	132	# a 'db' line, with or without a label
	133	#
	134	elsif( /^\s\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.)/i )
	135	{
	136	$bytes = $1;
	137	$continuation = $2;
	138	$label = $3;
	139	$spacing = $4;
	140	$db = $5;
	141	$string = $6;
	142
	143	print $label.$spacing."; ".$db.$string."\n";
	144
	145	if( $continuation eq "-" )
	146	{
	147	do
	148	{
[197]	149	$_ = $lines[++$l];
	150	/^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i \|\| die "parse error on continuation: '".$_."'";
[189]	151	$bytes .= $1;
	152	$continuation = $2;
	153	}
	154	while( $continuation eq "-" );
	155	}
	156
	157	&processString( $bytes, $label.$spacing, $db );
	158	}
	159
	160	#
	161	# everything else, copy to the output as is
	162	#
	163	elsif( /^\s\d+\s(.*)$/ )
	164	{
	165	print $1."\n";
	166	}
	167	}
	168
[197]	169	print ";;; end of input stream\n\n";
[189]	170
	171	#--------------------------------------------------------------------------------
	172	#
	173	# Output constants and the TranslatesAndFormats table
	174	#
	175
	176	print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
	177
	178	print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
	179
	180	print "StringsCompressed_TranslatesAndFormats: \n";
	181
	182	foreach $f (keys(%translate))
	183	{
	184	$translate_index[$translate{$f}] = $f;
[196]	185	$used{$f} \|\| die "translate $f unused\n";
[189]	186	$translate{$f} <= 31 \|\| die $translate{$f}.": translate codes must be below 32";
	187	}
	188
	189	for( $g = 0; $translate_index[$g]; $g++ )
	190	{
	191	print " db ".$translate_index[$g]." ; ".$g."\n";
	192	}
	193
	194	foreach $f (keys(%format))
	195	{
	196	$n = $f;
	197	$n =~ s/\-/_/g;
	198	$format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
	199	$used{$f} \|\| die "format $f unused\n";
	200	$format{$f} <= 31 \|\| die $format{$f}.": format codes must be below 32";
	201	}
	202
	203	for( $t = $format_begin; $format_index[$t]; $t++ )
	204	{
	205	print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
	206	}
	207
	208	print "\n";
	209
	210	#
	211	# Ensure that branch targets are within reach
	212	#
[194]	213	print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
[189]	214	for( $t = $format_begin; $format_index[$t]; $t++ )
	215	{
	216	print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] \|\| DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
	217	print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
	218	print "%endif\n";
	219	}
[194]	220	print "%endif\n";
[189]	221
	222	#--------------------------------------------------------------------------------
	223	#
	224	# Output usage statistics
	225	#
	226
	227	print "\n;; translated usage stats\n";
	228	foreach $f (keys(%special))
	229	{
	230	print ";; ".$f.":".$used{$f}."\n";
	231	$translate_count++;
	232	}
	233	print ";; total translated: ".$translate_count."\n";
	234
	235	print "\n;; format usage stats\n";
	236	$format_count = 0;
	237	foreach $f (keys(%format))
	238	{
	239	print ";; ".$f.":".$used{$f}."\n";
	240	$format_count++;
	241	}
	242	print ";; total format: ".$format_count."\n";
	243
	244	print "\n;; alphabet usage stats\n";
	245
	246	$used_count = 0;
	247	for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
	248	{
	249	print ";; ".$t.",".chr($t).":".$used{$t}."\n";
	250	if( $used{$t} )
	251	{
	252	$used_count++;
	253	}
	254	}
	255	print ";; alphabet used count: ".$used_count."\n";
	256
	257	#--------------------------------------------------------------------------------
	258	#
	259	# processString does the real compression work...
	260	#
	261
	262	sub processString
	263	{
	264	$chars = $_[0];
	265	$label = $_[1];
	266	$db = $_[2];
	267
	268	$label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
	269
	270	#
	271	# Copy numeric bytes out of hexadecimal pairs in the listing
	272	#
	273	$#v = 0;
	274
	275	$orig = "";
	276	for( $g = 0; $g < length($chars); $g += 2 )
	277	{
	278	$i = $g/2;
	279	$v[$i] = hex(substr($chars,$g,2));
	280	$orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
	281	}
	282	$v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
	283	# the string is a null
	284
	285	$output = "";
	286	#
	287	# Loop through bytes...
	288	# looking ahead as needed for possible space and null optimizations, compiling formats
	289	#
	290	for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
	291	{
	292	#
	293	# Special translation of LF,CR to a format
	294	#
	295	if( $v[$g] == 10 && $v[$g+1] == 13 )
	296	{
	297	$g++;
	298	$post = $code_translate;
	299	$code = $format{"nl"};
	300	$used{"nl"}++;
	301	}
	302
	303	#
	304	# Format operators
	305	#
	306	elsif( $v[$g] == 0x25 ) # "%"
	307	{
	308	$fo = "";
	309	$g++;
	310	if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
	311	{
	312	$fo = $fo.chr($v[$g]);
	313	$g++;
	314	}
	315	if( $v[$g] == ord("-") )
	316	{
	317	$fo = $fo.chr($v[$g]);
	318	$g++;
	319	}
	320	$fo = $fo.chr($v[$g]);
	321
	322	$format{$fo} \|\| die "unknown format operator: '".$fo."'\n";
	323
	324	$code = $format{$fo};
	325	$post = $code_translate;
	326	$used{$fo}++;
	327	}
	328
	329	#
	330	# Translated characters
	331	#
	332	elsif( $v[$g] == 32 \|\| $translate{$v[$g]} )
	333	{
	334	$post = $code_translate;
	335	$code = $translate{$v[$g]};
	336	$used{$v[$g]}++;
	337	}
	338
	339	#
	340	# "normal" characters (alphabet, and ASCII characters around the alphabet)
	341	#
	342	elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
	343	{
	344	$used{$v[$g]}++;
	345
	346	$post = $code_normal;
	347	$code = $v[$g] - $normal_base;
	348	}
	349
	350	#
	351	# Not found
	352	#
	353	else
	354	{
	355	die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
	356	}
	357
	358	if( $post == $code_translate )
	359	{
	360	#
	361	# NULL optimization (space optimization not possible on translate/format)
	362	#
	363	if( $v[$g+1] == 0 )
	364	{
	365	$g++;
	366	$post = $post \| $code_translate_null;
	367	}
	368	else
	369	{
	370	$post = $post \| $code_translate_normal;
	371	}
	372	}
	373	else # $post == $code_normal
	374	{
	375	#
	376	# Space optimization
	377	#
	378	if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
	379	{
	380	# can't take this optimization if the next byte is a null,
	381	# since we can't have both a postfix space and null
	382	$g++;
	383	$post = $code_space;
	384	}
	385
	386	#
	387	# NULL optimization
	388	#
	389	elsif( $v[$g+1] == 0 )
	390	{
	391	$g++;
	392	$post = $code_null;
	393	}
	394	}
	395
	396	$code = $code \| $post;
	397	$output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
	398	}
	399
	400	print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
	401	print $label." ".$db.substr($output,2);
	402	for( $t = length($output); $t < length($orig); $t++ )
	403	{
	404	print " ";
	405	}
	406	print " ; compressed\n\n";
	407	}
	408

Note: See TracBrowser for help on using the repository browser.

Download in other formats: