Context Navigation

source: xtideuniversalbios/trunk/XTIDE_Universal_BIOS/Src/StringsCompress.pl@ 190

Visit:

Last change on this file since 190 was 189, checked in by gregli@…, 13 years ago
Additional space optimizations, including making IdleProcessing an option in MENUEVENT. Note the fall-through from one file to another, but that there are assembler checks to ensure the proper linkage is maintained. First version of StringsCompress.pl, a perl script to make StringsCompressed.asm from Strings.asm.
File size: 10.4 KB

Rev	Line
[189]	1	#======================================================================================
	2	#
	3	# Project name : XTIDE Universal BIOS
	4	#
	5	# Authors : Greg Lindhorst
	6	# gregli@hotmail.com
	7	#
	8	# Description : Script for compiling and compressing strings for
	9	# use by DisplayFormatCompressed.asm. See the header of that file
	10	# for a description of the compression scheme.
	11	#
	12	# Usage : stdin: Listing of strings.asm,
	13	# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
	14	# We used the listing so that the assembler can take care of
	15	# resolving %define and EQU symbol definitions.
	16	#
	17	# stdout: StringsCompressed.asm,
	18	# plug replacement for Strings.asm (included by Main.asm)
	19	#
	20	# Also see the XTIDE makefile for building StringsCompressed.asm
	21	#
	22
	23	#----------------------------------------------------------------------
	24	#
	25	# Translated and Format characters
	26	#
	27	# DisplayFormatCompressed can only deal with characters in one of the following categories:
	28	# 1. Those in the Translate associative array
	29	# 2. Those in the Format associative array
	30	# 3. Characters between $normal_base and $normal_base+0x40
	31	# 4. Null characters (marking the end of strings)
	32	# 5. The special string LF,CR
	33	#
	34	# If a character or format read at the input cannot be found in one of the above categories,
	35	# it must be added here before this script will accept it (and DisplayFormatCompressed can
	36	# display it).
	37	#
	38	# Note that these tables are not present in DisplayFormatCompressed, and do not need to
	39	# updated there. Needed information is put in the compression output that it reads.
	40	#
	41	$translate{ord(' ')} = 0;
	42	$translate{172} = 1; # ONE_QUARTER
	43	$translate{171} = 2; # ONE_HALF
	44	$translate{179} = 3; # SINGLE_VERTICAL
	45	$translate{175} = 4; # ANGLE_QUOTE_RIGHT
	46	$translate{ord('!')} = 5;
	47	$translate{ord('"')} = 6;
	48	$translate{ord(',')} = 7;
	49	$translate{ord('-')} = 8;
	50	$translate{ord('.')} = 9;
	51	$translate{ord('/')} = 10;
	52	$translate{ord('1')} = 11;
	53	$translate{ord('2')} = 12;
	54	$translate{ord('3')} = 13;
	55	$translate{ord('4')} = 14;
	56	$translate{ord('5')} = 15;
	57	$translate{ord('6')} = 16;
	58	$translate{ord('8')} = 17;
	59	$translate{200} = 18; # DOUBLE_BOTTOM_LEFT_CORNER
	60	$translate{181} = 19; # DOUBLE_LEFT_HORIZONTAL_TO_SINGLE_VERTICAL
	61
	62	#
	63	# Formats begin immediately after the last Translated character (they are in the same table)
	64	#
	65	$format_begin = 20;
	66
	67	$format{"s"} = 20; # n/a
	68	$format{"c"} = 21; # n/a
	69	$format{"2-I"} = 22; # must be even
	70	$format{"u"} = 23; # must be odd
	71	$format{"5-u"} = 24; # must be even
	72	$format{"x"} = 25; # must be odd
	73	$format{"5-x"} = 26; # must be even
	74	$format{"nl"} = 27; # n/a
	75	$format{"2-u"} = 28; # must be even
	76	$format{"A"} = 29; # n/a
	77
	78	# NOTE: The last $format cannot exceed 31 (stored in a 5-bit quantity).
	79
	80	#
	81	# Starting point for the "normal" range, typically around 0x40 to cover upper and lower case
	82	# letters. If lower case 'z' is not used, 0x3a can be a good choice as it adds ':' to the
	83	# front end.
	84	#
	85	$normal_base = 0x3a;
	86
	87	#
	88	# High order code bits, determining which type of character we have (translated or not) and
	89	# if a space or null should come after this character.
	90	#
	91	$code_space = 0xc0;
	92	$code_null = 0x80;
	93	$code_normal = 0x40;
	94	$code_translate = 0x00;
	95
	96	#
	97	# Bit used if it is a translated byte
	98	#
	99	$code_translate_null = 0x00;
	100	$code_translate_normal = 0x20;
	101
	102	print ";;;======================================================================\n";
	103	print ";;;\n";
	104	print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
	105	print ";;; DO NOT EDIT DIRECTLY - See the maekfile for how to rebuild this file.\n";
	106	print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
	107	print ";;;\n";
	108	print ";;;======================================================================\n\n";
	109
	110	#
	111	# Loop through lines of the listing, looking for 'db' lines (and dealing with continuations)
	112	# and compressing each line as it is encountered.
	113	#
	114	while(<>)
	115	{
	116	#
	117	# The <number> indicates a line from an include file, do not include in the output
	118	#
	119	if( /^\s\d+\s\<\d\>/ )
	120	{
	121	}
	122
	123	#
	124	# a 'db' line, with or without a label
	125	#
	126	elsif( /^\s\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.)/i )
	127	{
	128	$bytes = $1;
	129	$continuation = $2;
	130	$label = $3;
	131	$spacing = $4;
	132	$db = $5;
	133	$string = $6;
	134
	135	print $label.$spacing."; ".$db.$string."\n";
	136
	137	if( $continuation eq "-" )
	138	{
	139	do
	140	{
	141	$_ = <>;
	142	/^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i \|\| die "parse error on continuation";
	143	$bytes .= $1;
	144	$continuation = $2;
	145	}
	146	while( $continuation eq "-" );
	147	}
	148
	149	&processString( $bytes, $label.$spacing, $db );
	150	}
	151
	152	#
	153	# everything else, copy to the output as is
	154	#
	155	elsif( /^\s\d+\s(.*)$/ )
	156	{
	157	print $1."\n";
	158	}
	159	}
	160
	161	print ";;; end of strings.asm\n\n";
	162
	163	#--------------------------------------------------------------------------------
	164	#
	165	# Output constants and the TranslatesAndFormats table
	166	#
	167
	168	print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
	169
	170	print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
	171
	172	print "StringsCompressed_TranslatesAndFormats: \n";
	173
	174	foreach $f (keys(%translate))
	175	{
	176	$translate_index[$translate{$f}] = $f;
	177	$used{$f} \|\| print "translate $f unused\n";
	178	$translate{$f} <= 31 \|\| die $translate{$f}.": translate codes must be below 32";
	179	}
	180
	181	for( $g = 0; $translate_index[$g]; $g++ )
	182	{
	183	print " db ".$translate_index[$g]." ; ".$g."\n";
	184	}
	185
	186	foreach $f (keys(%format))
	187	{
	188	$n = $f;
	189	$n =~ s/\-/_/g;
	190	$format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
	191	$used{$f} \|\| die "format $f unused\n";
	192	$format{$f} <= 31 \|\| die $format{$f}.": format codes must be below 32";
	193	}
	194
	195	for( $t = $format_begin; $format_index[$t]; $t++ )
	196	{
	197	print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
	198	}
	199
	200	print "\n";
	201
	202	#
	203	# Ensure that branch targets are within reach
	204	#
	205	for( $t = $format_begin; $format_index[$t]; $t++ )
	206	{
	207	print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] \|\| DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
	208	print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
	209	print "%endif\n";
	210	}
	211
	212	#--------------------------------------------------------------------------------
	213	#
	214	# Output usage statistics
	215	#
	216
	217	print "\n;; translated usage stats\n";
	218	foreach $f (keys(%special))
	219	{
	220	print ";; ".$f.":".$used{$f}."\n";
	221	$translate_count++;
	222	}
	223	print ";; total translated: ".$translate_count."\n";
	224
	225	print "\n;; format usage stats\n";
	226	$format_count = 0;
	227	foreach $f (keys(%format))
	228	{
	229	print ";; ".$f.":".$used{$f}."\n";
	230	$format_count++;
	231	}
	232	print ";; total format: ".$format_count."\n";
	233
	234	print "\n;; alphabet usage stats\n";
	235
	236	$used_count = 0;
	237	for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
	238	{
	239	print ";; ".$t.",".chr($t).":".$used{$t}."\n";
	240	if( $used{$t} )
	241	{
	242	$used_count++;
	243	}
	244	}
	245	print ";; alphabet used count: ".$used_count."\n";
	246
	247	#--------------------------------------------------------------------------------
	248	#
	249	# processString does the real compression work...
	250	#
	251
	252	sub processString
	253	{
	254	$chars = $_[0];
	255	$label = $_[1];
	256	$db = $_[2];
	257
	258	$label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
	259
	260	#
	261	# Copy numeric bytes out of hexadecimal pairs in the listing
	262	#
	263	$#v = 0;
	264
	265	$orig = "";
	266	for( $g = 0; $g < length($chars); $g += 2 )
	267	{
	268	$i = $g/2;
	269	$v[$i] = hex(substr($chars,$g,2));
	270	$orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
	271	}
	272	$v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
	273	# the string is a null
	274
	275	$output = "";
	276	#
	277	# Loop through bytes...
	278	# looking ahead as needed for possible space and null optimizations, compiling formats
	279	#
	280	for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
	281	{
	282	#
	283	# Special translation of LF,CR to a format
	284	#
	285	if( $v[$g] == 10 && $v[$g+1] == 13 )
	286	{
	287	$g++;
	288	$post = $code_translate;
	289	$code = $format{"nl"};
	290	$used{"nl"}++;
	291	}
	292
	293	#
	294	# Format operators
	295	#
	296	elsif( $v[$g] == 0x25 ) # "%"
	297	{
	298	$fo = "";
	299	$g++;
	300	if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
	301	{
	302	$fo = $fo.chr($v[$g]);
	303	$g++;
	304	}
	305	if( $v[$g] == ord("-") )
	306	{
	307	$fo = $fo.chr($v[$g]);
	308	$g++;
	309	}
	310	$fo = $fo.chr($v[$g]);
	311
	312	$format{$fo} \|\| die "unknown format operator: '".$fo."'\n";
	313
	314	$code = $format{$fo};
	315	$post = $code_translate;
	316	$used{$fo}++;
	317	}
	318
	319	#
	320	# Translated characters
	321	#
	322	elsif( $v[$g] == 32 \|\| $translate{$v[$g]} )
	323	{
	324	$post = $code_translate;
	325	$code = $translate{$v[$g]};
	326	$used{$v[$g]}++;
	327	}
	328
	329	#
	330	# "normal" characters (alphabet, and ASCII characters around the alphabet)
	331	#
	332	elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
	333	{
	334	$used{$v[$g]}++;
	335
	336	$post = $code_normal;
	337	$code = $v[$g] - $normal_base;
	338	}
	339
	340	#
	341	# Not found
	342	#
	343	else
	344	{
	345	die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
	346	}
	347
	348	if( $post == $code_translate )
	349	{
	350	#
	351	# NULL optimization (space optimization not possible on translate/format)
	352	#
	353	if( $v[$g+1] == 0 )
	354	{
	355	$g++;
	356	$post = $post \| $code_translate_null;
	357	}
	358	else
	359	{
	360	$post = $post \| $code_translate_normal;
	361	}
	362	}
	363	else # $post == $code_normal
	364	{
	365	#
	366	# Space optimization
	367	#
	368	if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
	369	{
	370	# can't take this optimization if the next byte is a null,
	371	# since we can't have both a postfix space and null
	372	$g++;
	373	$post = $code_space;
	374	}
	375
	376	#
	377	# NULL optimization
	378	#
	379	elsif( $v[$g+1] == 0 )
	380	{
	381	$g++;
	382	$post = $code_null;
	383	}
	384	}
	385
	386	$code = $code \| $post;
	387	$output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
	388	}
	389
	390	print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
	391	print $label." ".$db.substr($output,2);
	392	for( $t = length($output); $t < length($orig); $t++ )
	393	{
	394	print " ";
	395	}
	396	print " ; compressed\n\n";
	397	}
	398

Note: See TracBrowser for help on using the repository browser.

Download in other formats: