Context Navigation

source: xtideuniversalbios/trunk/Tools/StringsCompress.pl@ 280

Visit:

Last change on this file since 280 was 277, checked in by gregli@…, 13 years ago
Moved the bulk of the serial code to the assembly library, for inclusion in other utilities. Fixed a bug in int13h.asm when floppy support was not enabled that was preventing foreign drives from working properly.
File size: 10.4 KB

Rev	Line
[189]	1	#======================================================================================
	2	#
	3	# Project name : XTIDE Universal BIOS
	4	#
	5	# Authors : Greg Lindhorst
	6	# gregli@hotmail.com
	7	#
[242]	8	# Description : Script for compiling and compressing strings for
[189]	9	# use by DisplayFormatCompressed.asm. See the header of that file
	10	# for a description of the compression scheme.
	11	#
[242]	12	# Usage : stdin: Listing of strings.asm,
[189]	13	# assembled with MODULE_STRINGS_COMPRESSED_PRECOMPRESS.
	14	# We used the listing so that the assembler can take care of
	15	# resolving %define and EQU symbol definitions.
	16	#
	17	# stdout: StringsCompressed.asm,
	18	# plug replacement for Strings.asm (included by Main.asm)
	19	#
	20	# Also see the XTIDE makefile for building StringsCompressed.asm
	21	#
	22
	23	#----------------------------------------------------------------------
	24	#
[197]	25	# Translated, Format, and "Normal" characters
[189]	26	#
	27	# DisplayFormatCompressed can only deal with characters in one of the following categories:
	28	# 1. Those in the Translate associative array
	29	# 2. Those in the Format associative array
[242]	30	# 3. Characters between $normal_base and $normal_base+0x40
	31	# (typically covers upper and lower case alphabets)
[189]	32	# 4. Null characters (marking the end of strings)
	33	# 5. The special string LF,CR
	34	#
[242]	35	# If a character or format read at the input cannot be found in one of the above categories,
	36	# it must be added here before this script will accept it (and DisplayFormatCompressed can
[189]	37	# display it).
	38	#
[197]	39	# Tables for the above categories are expected in the input stream, before string to be
[242]	40	# compressed are provided. Note that these tables are not present in DisplayFormatCompressed,
	41	# and do not need to be updated there. Needed information is put in the compression output
[197]	42	# that it reads.
[189]	43	#
	44
	45	#
	46	# High order code bits, determining which type of character we have (translated or not) and
	47	# if a space or null should come after this character.
	48	#
	49	$code_space = 0xc0;
	50	$code_null = 0x80;
	51	$code_normal = 0x40;
	52	$code_translate = 0x00;
	53
	54	#
	55	# Bit used if it is a translated byte
	56	#
	57	$code_translate_null = 0x00;
	58	$code_translate_normal = 0x20;
	59
	60	print ";;;======================================================================\n";
	61	print ";;;\n";
	62	print ";;; This file is generated by StringsCompress.pl from source in Strings.asm\n";
[242]	63	print ";;; DO NOT EDIT DIRECTLY - See the makefile for how to rebuild this file.\n";
[189]	64	print ";;; This file only needs to be rebuilt if Strings.asm is changed.\n";
	65	print ";;;\n";
	66	print ";;;======================================================================\n\n";
	67
[197]	68
[189]	69	#
[242]	70	# On a first pass, look for our table directives. $translate{...}, $format{...}, etc.
	71	# are expected in the input stream.
[189]	72	#
[197]	73	$processed = " [StringsCompress Processed]";
[189]	74	while(<>)
	75	{
[197]	76	chop;
	77	$o = $_;
	78
[189]	79	#
[197]	80	# Table entries for this script
	81	#
	82	if( /^\s\d+\s(\;\$translate\{\sord\(\s'(.)'\s\)\s\}\s=\s([0-9]+).*$)/ )
	83	{
	84	$translate{ord($2)} = int($3);
	85	$o .= $processed;
	86	}
	87	elsif( /^\s\d+\s(\;\$translate\{\s([0-9]+)\s\}\s=\s([0-9]+).*$)/ )
	88	{
	89	$translate{int($2)} = int($3);
	90	$o .= $processed;
	91	}
	92	elsif( /^\s\d+\s(\;\$format_begin\s=\s([0-9]+).*$)/ )
	93	{
	94	$format_begin = int($2);
	95	$o .= $processed;
	96	}
	97	elsif( /^\s\d+\s(\;\$format\{\s\"([^\"]+)\"\s\}\s=\s([0-9]+).*$)/ )
	98	{
	99	$format{$2} = int($3);
	100	$o .= $processed;
	101	}
	102	elsif( /^\s\d+\s(\;\$normal_base\s=\s0x([0-9a-fA-F]+).*$)/ )
	103	{
	104	$normal_base = hex($2);
	105	$o .= $processed;
	106	}
	107	elsif( /^\s\d+\s(\;\$normal_base\s=\s([0-9]+).*$)/ )
	108	{
	109	$normal_base = int($2);
	110	$o .= $processed;
	111	}
	112
	113	push( @lines, $o );
	114	}
	115
	116	#
[242]	117	# On the second pass, loop through lines of the listing, looking for 'db' lines
[197]	118	# (and dealing with continuations) and compressing each line as it is encountered.
	119	#
	120	for( $l = 0; $l < $#lines; $l++ )
	121	{
	122	$_ = $lines[$l];
	123
	124	#
[189]	125	# The <number> indicates a line from an include file, do not include in the output
	126	#
	127	if( /^\s\d+\s\<\d\>/ )
	128	{
	129	}
	130
	131	#
	132	# a 'db' line, with or without a label
	133	#
	134	elsif( /^\s\d+\s[0-9A-F]+\s([0-9A-F]+)(-?)\s+([a-z0-9_]+:)?(\s+)(db\s+)(.)/i )
	135	{
	136	$bytes = $1;
	137	$continuation = $2;
	138	$label = $3;
	139	$spacing = $4;
	140	$db = $5;
	141	$string = $6;
	142
	143	print $label.$spacing."; ".$db.$string."\n";
	144
	145	if( $continuation eq "-" )
	146	{
	147	do
	148	{
[197]	149	$_ = $lines[++$l];
	150	/^\s*\d+\s[0-9A-F]+\s([0-9A-F]+)(\-?)/i \|\| die "parse error on continuation: '".$_."'";
[189]	151	$bytes .= $1;
	152	$continuation = $2;
	153	}
	154	while( $continuation eq "-" );
	155	}
	156
	157	&processString( $bytes, $label.$spacing, $db );
	158	}
[242]	159
	160	#
[277]	161	# a ';%%;' prefix line, copy to output without the prefix
	162	#
	163	elsif( /^\s\d+\s;%%;\s(.)$/ )
	164	{
	165	print $1."\n";
	166	}
	167
	168	#
[189]	169	# everything else, copy to the output as is
	170	#
	171	elsif( /^\s\d+\s(.*)$/ )
	172	{
	173	print $1."\n";
	174	}
	175	}
	176
[197]	177	print ";;; end of input stream\n\n";
[189]	178
	179	#--------------------------------------------------------------------------------
	180	#
	181	# Output constants and the TranslatesAndFormats table
	182	#
	183
	184	print "StringsCompressed_NormalBase equ ".$normal_base."\n\n";
	185
	186	print "StringsCompressed_FormatsBegin equ ".$format_begin."\n\n";
	187
	188	print "StringsCompressed_TranslatesAndFormats: \n";
	189
	190	foreach $f (keys(%translate))
	191	{
	192	$translate_index[$translate{$f}] = $f;
[196]	193	$used{$f} \|\| die "translate $f unused\n";
[189]	194	$translate{$f} <= 31 \|\| die $translate{$f}.": translate codes must be below 32";
	195	}
	196
	197	for( $g = 0; $translate_index[$g]; $g++ )
	198	{
	199	print " db ".$translate_index[$g]." ; ".$g."\n";
	200	}
	201
	202	foreach $f (keys(%format))
	203	{
	204	$n = $f;
	205	$n =~ s/\-/_/g;
	206	$format_index[$format{$f}] = "DisplayFormatCompressed_Format_".$n;
	207	$used{$f} \|\| die "format $f unused\n";
	208	$format{$f} <= 31 \|\| die $format{$f}.": format codes must be below 32";
	209	}
	210
	211	for( $t = $format_begin; $format_index[$t]; $t++ )
	212	{
	213	print " db (DisplayFormatCompressed_BaseFormatOffset - ".$format_index[$t].") ; ".$t."\n";
	214	}
	215
	216	print "\n";
	217
[242]	218	#
[189]	219	# Ensure that branch targets are within reach
	220	#
[194]	221	print "%ifndef CHECK_FOR_UNUSED_ENTRYPOINTS\n";
[189]	222	for( $t = $format_begin; $format_index[$t]; $t++ )
	223	{
	224	print "%if DisplayFormatCompressed_BaseFormatOffset < $format_index[$t] \|\| DisplayFormatCompressed_BaseFormatOffset - $format_index[$t] > 255\n";
	225	print "%error \"".$format_index[$t]." is out of range of DisplayFormatCompressed_BaseFormatOffset\"\n";
	226	print "%endif\n";
	227	}
[194]	228	print "%endif\n";
[189]	229
	230	#--------------------------------------------------------------------------------
	231	#
	232	# Output usage statistics
	233	#
	234
	235	print "\n;; translated usage stats\n";
[241]	236	foreach $f (keys(%translate))
[189]	237	{
	238	print ";; ".$f.":".$used{$f}."\n";
	239	$translate_count++;
	240	}
	241	print ";; total translated: ".$translate_count."\n";
	242
	243	print "\n;; format usage stats\n";
	244	$format_count = 0;
	245	foreach $f (keys(%format))
	246	{
	247	print ";; ".$f.":".$used{$f}."\n";
	248	$format_count++;
	249	}
	250	print ";; total format: ".$format_count."\n";
	251
	252	print "\n;; alphabet usage stats\n";
	253
	254	$used_count = 0;
	255	for( $t = $normal_base; $t < $normal_base + 0x40; $t++ )
	256	{
	257	print ";; ".$t.",".chr($t).":".$used{$t}."\n";
	258	if( $used{$t} )
	259	{
	260	$used_count++;
	261	}
	262	}
	263	print ";; alphabet used count: ".$used_count."\n";
	264
	265	#--------------------------------------------------------------------------------
	266	#
	267	# processString does the real compression work...
	268	#
	269
	270	sub processString
	271	{
	272	$chars = $_[0];
	273	$label = $_[1];
	274	$db = $_[2];
	275
	276	$label =~ s/[a-z0-9_:]/ /ig; # replace with spaces for proper output spacing
	277
	278	#
	279	# Copy numeric bytes out of hexadecimal pairs in the listing
	280	#
	281	$#v = 0;
	282
	283	$orig = "";
	284	for( $g = 0; $g < length($chars); $g += 2 )
	285	{
	286	$i = $g/2;
	287	$v[$i] = hex(substr($chars,$g,2));
	288	$orig .= sprintf( ($v[$i] > 0x9f ? ", %03xh" : ", %02xh"), $v[$i] );
	289	}
[242]	290	$v[length($chars)/2] = 0xff; # guard byte to avoid thinking going past the end of
[189]	291	# the string is a null
	292
	293	$output = "";
	294	#
	295	# Loop through bytes...
	296	# looking ahead as needed for possible space and null optimizations, compiling formats
	297	#
	298	for( $g = 0; $g < $#v-1; $g++ ) # -1 for the guard byte
	299	{
	300	#
	301	# Special translation of LF,CR to a format
	302	#
	303	if( $v[$g] == 10 && $v[$g+1] == 13 )
	304	{
	305	$g++;
	306	$post = $code_translate;
	307	$code = $format{"nl"};
	308	$used{"nl"}++;
	309	}
	310
	311	#
	312	# Format operators
	313	#
	314	elsif( $v[$g] == 0x25 ) # "%"
	315	{
	316	$fo = "";
	317	$g++;
	318	if( $v[$g] >= ord("0") && $v[$g] <= ord("9") )
	319	{
	320	$fo = $fo.chr($v[$g]);
	321	$g++;
	322	}
	323	if( $v[$g] == ord("-") )
	324	{
	325	$fo = $fo.chr($v[$g]);
	326	$g++;
	327	}
	328	$fo = $fo.chr($v[$g]);
	329
	330	$format{$fo} \|\| die "unknown format operator: '".$fo."'\n";
	331
	332	$code = $format{$fo};
	333	$post = $code_translate;
	334	$used{$fo}++;
	335	}
	336
	337	#
	338	# Translated characters
	339	#
	340	elsif( $v[$g] == 32 \|\| $translate{$v[$g]} )
	341	{
	342	$post = $code_translate;
	343	$code = $translate{$v[$g]};
	344	$used{$v[$g]}++;
	345	}
[242]	346
[189]	347	#
	348	# "normal" characters (alphabet, and ASCII characters around the alphabet)
	349	#
	350	elsif( $v[$g] >= $normal_base && $v[$g] < ($normal_base+0x40) )
	351	{
	352	$used{$v[$g]}++;
	353
	354	$post = $code_normal;
	355	$code = $v[$g] - $normal_base;
	356	}
	357
	358	#
	359	# Not found
[242]	360	#
[189]	361	else
	362	{
	363	die $v[$g].": no translation or format, and out of normal range - may need to be added\n";
	364	}
	365
	366	if( $post == $code_translate )
	367	{
	368	#
	369	# NULL optimization (space optimization not possible on translate/format)
	370	#
	371	if( $v[$g+1] == 0 )
	372	{
	373	$g++;
	374	$post = $post \| $code_translate_null;
	375	}
	376	else
	377	{
	378	$post = $post \| $code_translate_normal;
	379	}
	380	}
	381	else # $post == $code_normal
	382	{
	383	#
	384	# Space optimization
	385	#
	386	if( $v[$g+1] == 0x20 && $v[$g+2] != 0 )
	387	{
[242]	388	# can't take this optimization if the next byte is a null,
[189]	389	# since we can't have both a postfix space and null
	390	$g++;
	391	$post = $code_space;
	392	}
	393
	394	#
	395	# NULL optimization
	396	#
	397	elsif( $v[$g+1] == 0 )
	398	{
	399	$g++;
	400	$post = $code_null;
	401	}
	402	}
	403
	404	$code = $code \| $post;
	405	$output .= sprintf( ($code > 0x9f ? ", %03xh" : ", %02xh"), $code );
	406	}
	407
	408	print $label."; ".$db.substr($orig,2)." ; uncompressed\n";
	409	print $label." ".$db.substr($output,2);
	410	for( $t = length($output); $t < length($orig); $t++ )
	411	{
	412	print " ";
	413	}
	414	print " ; compressed\n\n";
	415	}
	416

Note: See TracBrowser for help on using the repository browser.

Download in other formats: