/*
 * SPEEDUP	O B J E C T  C O D E  'O P T I M I S E R'
 *		
 *		(c) Chris Undery 1983
 *		11 Margaret st Newtown 2042
 *		Sydney Australia.

 Desription:-

 This program was written to 'optimise' bds c object code to take 
advantage of the undocumented instructions of the Intel 8085 mpu.

see cpu.asm for macros to enable assembly of these instructions.
    
	hl <- (de)	
	hl -> (de)	
	16 bit shifts
        16 bit subtraction
	stack + offset references
	conditional branch with dcx inx using the x5 flag
	restart on overflow

These instructions 'live' in all Intel 8085 mpus. They were
not published when the mpu was first realeased for some weird
marketing reason. Taken in perspective they really shed new 
light on the Z80 8085 comparison stakes.

The file supplied .. CPU.ASM should be assembled using MAC
or M80 and loaded to binary format (use link, load or ddt/sid).
This program should be invoked as:-

    speedup infile_name outfile_name cpu.com

The result of this is that BDS C programs will run faster. Programs
which make use of much pointer work and string comparisons will be
most effected by this tom foolery. There are many more code sequences
generated by the bds compiler which could be worked over..see who
can get the fastest sieve benchmark going!

Heavily disk io bound software will not see much improvement by the
use of speedup, but other software will. Dont expect too much for 
nothing. I have not doctored the 16 bit rotates in c.ccc, they are
good candidates for optimising using the 16 bit rotate instructions
play around and have fun.

*/


#include bdscio.h

#define BUFSECS 20000 / 128	/* # of sectors which fit in buf*/

#define NO	0		/* booleans			*/
#define YES	1

#define TEXT	char
#define UCOUNT	unsigned
#define COUNT	int
#define BYTE	char

/* leading byte on rules file holds cpu type */
#define I8080	0
#define I8085	1
#define Z80	2
#define I8886	3
#define ASCII	4

/* Global Data Storage	*/
TEXT	object[20000],		/* object code buffer		*/
	rules[256][64],		/* transformation table		*/
	rulebuf[BUFSIZ],	/* file buffer for rules	*/
	cpu,			/* cpu descriptor		*/
	rule;			/* current rule being checked	*/

UCOUNT	infile,			/* input file descriptor	*/
	outfile,		/* output file descriptor	*/
	size,			/* no of sects read from input	*/
	bytes,			/* bytes transferred		*/
	num_rules,		/* max number of sequences	*/
	modified;		/* total bytes modified		*/


main(argc,argv)
TEXT **argv;
{

	num_rules = modified =  size = NULL;
	if (argc < 4)		
		{
		puts("Usage: SPEEDUP input_file  output_file  mask_file\n");
		exit();
		}
	puts("\nSPEEDUP Object Code 'Optimiser' V 1.0 (C) Chris Undery 1983\n");

	if (fopen(argv[3],rulebuf) == ERROR)
		{
		printf("%s not found\n",argv[3]);
		exit();
		}


	load_rules();			/* load rules table from file */

	if ((infile = open(argv[1],0)) == ERROR)
		{
		printf("%s not found\n",argv[1]);
		exit();
		}

	if ((outfile = creat(argv[2])) == ERROR)
		{
		printf("Can't create %s\n",argv[2]);
		exit();
		}

	size = read(infile,object,BUFSECS);	/* gulp		*/
	
	if (size == ERROR)
		{
		printf("Error reading %s\n",argv[1]);
		exit();
		}

	printf("Now processing %s [file size = %d]\n",argv[1],size*128);
	optimise();			/* modify the com file	*/

	if (write(outfile,object,size) != size)
		{
		printf("Error writing %s, check disk space\n",argv[2]);
		exit();
		}

	close(infile);

	if (close(outfile) == ERROR)
		{
		printf("Error closing %s\n",argv[2]);
		exit();
		}

	puts("CPU type = ");
	switch (cpu) 
		{
		case I8080: puts("8080");
			    break;
		case I8085: puts("8085");break;
		case I8886: puts("8088 / 8086");break;
		case Z80:   puts("Z80"); break;
		default:    puts("ASCII TEXT  replacement");
		}

	printf(" Bytes altered = %5u\n",modified);
	
}


/* scan rules table until all transformations completed */
optimise()
{
	COUNT k, j, loop;
	TEXT *msecs,*secs,*mins;

	msecs = 0xe837; secs = 0xe838; mins = 0xe839;
	*msecs = *secs = *mins = k = j = NULL;
	while (k < num_rules)
		{
		printf("Pass %02d, Replacements      ",k+1);
		transform(j);
		j += 2;
		k++;
		if (k == num_rules) break;
		puts("\r                         \r");
		}
	puts("\n");

/*
	printf("Cpu time <%02d:%02d:%03d>\n",
		*mins,*secs,*msecs);
*/

}

/* scan object code for match in rules table */
transform(k)
COUNT k;
{
	COUNT pos;		/* result of index function */
	TEXT *p;		/* pointer to nex element in object */
	COUNT subs;
		
	subs = pos = NULL;
	p = &object[0];		/* assign to start of object */
	/*
	 * rules[k][0] 		length of search mask 
	 * rules[k][1..31] 	search mask
	 * rules[k+1][0] 	length of replacement data
	 * rules[k+1][1..31] 	replacement data
	*/

	while (pos != ERROR )
		{
		pos = match(p,(size * 128) - pos ,&rules[k][1],rules[k][0]);
		if (pos == ERROR) break;
		p += pos;	/* add the offset needed */
		movmem(&rules[k+1][1],p,rules[k+1][0]);
		modified += rules[k+1][0];
		back(5);
		puts("     ");
		back(5);
		printf("%5u",++subs);
		}
}

/* find pattern in buffer , match for n characters */
match(s,s_len,t,t_len)
TEXT s[],
     t[];
COUNT s_len,
      t_len;
{
	COUNT i, j, k;

	for (i = 0; i < s_len; i++)
		{
		for (j = i, k= 0; s[j] == t[k]; j++, k++)
			;
		if (k == t_len) return i; /* return valid index */
		}
	return ERROR;	/* no match found in buffer */
}

/* load search masks and replacement patterns */
load_rules()
{
	COUNT c, j, k, i;

	i = j = NULL;
	if ((c = getc(rulebuf)) != CPMEOF)
		cpu = c;
	else return YES;
	while ((c = getc(rulebuf)) != CPMEOF)
		{
		k = rules[i][0] = c;
		if (!c) return YES;	/* count feild = zero */
		while (k--)
			{ 	/* load sequence string */
			rules[i][++j] = getc(rulebuf);
			if (j > 63)
				{
				printf("Sequence %d too long ",i);
				puts("or format error\n");
				exit();
				}
			}
		i++;
		j = 0;
		num_rules += (i % 2);
		if (num_rules > 128) return YES;
		}
}

/* run cursor backwards n spaces */
back(n)
COUNT n;
{
	COUNT j;

	for (j = 0 ; j < n; j++) putchar(0x08);
}



