/*
 * CBG - Compresion Basada en Gramaticas
 * (Usando prefijos)
 *
 * Algoritmo de prueba, no es 100% funcional.
 * Algoritmo no Optimizado.
 * Mucha repeticion de codigo.
 * Muchos posibles bugs.
 * Solo utilizado en los archivos en esta misma carpeta.
 * Usar en archivos sumamente pequenos (<10kb)
 *
 * $gcc cbg.c -o cbg -Wall -O2 -fomit-frame-pointer
 *
 * nitr0us <nitrousenador[at]gmail[d0t]com>
 * 15/11/06
 */
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<getopt.h>
#include<errno.h>

typedef unsigned char	byte;

typedef struct GramEntry{
	short	index;	/* Index en las gramaticas de su tipo */
	short	offset;	/* desplazamiento en el archivo */
} GE;

void uso(char *);
FILE *abrir_archivo(char *, char *);
int fsize(FILE *);
int Compara(byte [], byte [], int);

int main(int argc, char **argv)
{
	int		opt, comprimir = 1, flag, flag2, flag3;
	int		k, l, m, n, o;
	int		ngr1 = 0, ngr2 = 0;
	int		insize, aux = 0;
	short		x, y, z;
	byte		*infile;
	byte		*ptr;
	FILE		*in, *out, *gram;

	if(argc != 5)	uso(*argv);

	while((opt = getopt(argc, argv, "cd")) != EOF)
		switch(opt){
				case 'c':
					comprimir = 1;
					break;
				case 'd':
					comprimir = 0;
					break;
				default:
					uso(*argv);
		}

	if(comprimir){ /* COMPRESION */
		in   = abrir_archivo(argv[2], "r");
		out  = abrir_archivo(argv[3], "w");
		gram = abrir_archivo(argv[4], "w");

		insize = fsize(in);

		int	idx1 = insize - 2;
		int	idx2 = insize - 5;
		int	p, q;
		int	rep1[idx1];
		byte	foo[3];
		byte	gram1[idx1 / 2][3];
		byte	gram1tmp[idx1 / 2][3];
		byte	gram2[idx2][2];
		byte	gram2tmp[idx2][2];
		byte	tmp1[idx1][3];
		byte	tmp2[idx2][6];

		/***********************************************************************/
		/* MAPEAR CADA 3 BYTES */
		for(k = 0; k < idx1; k++){
			fread(tmp1[k], 3, 1, in);
			fseek(in, -2, SEEK_CUR);
		}

		rewind(in);

		/* MAPEAR CADA 6 BYTES */
		for(k = 0; k < idx2; k++){
			fread(tmp2[k], 6, 1, in);
			fseek(in, -5, SEEK_CUR);
		}

		rewind(in);

		for(k = 0; k < idx1; k++)
			rep1[k] = 1;

		/***********************************************************************/
		/* CONTAR CUANTAS VECES SE REPITE CADA SERIE DE 3 BYTES */
		for(k = 0; k < idx1; k++)
			for(l = k + 1; l < idx1; l++)
				if(Compara(tmp1[k], tmp1[l], 3))
					rep1[k]++;

		/* CREAR UN ARRAY CON LAS SERIES DE 3 BYTES QUE SE REPITEN 2 O MAS VECES */

		for(k = 0; k < idx1; k++)
			if(rep1[k] >= 2){
				for(m = 0, flag = 1; m < ngr1; m++)
					if(Compara(tmp1[k], gram1[m], 3)){
						flag = 0;
						break;
					}

				if(flag){
					gram1[ngr1][0] = tmp1[k][0];
					gram1[ngr1][1] = tmp1[k][1];
					gram1[ngr1++][2] = tmp1[k][2];
				}
			}

		/* BUSCAR PATRONES DE 3 BYTES EN LA SERIE DE 6 BYTES */
		for(k = 0; k < idx2; k++)
			for(l = 0, flag = 0; l < 4; l++){
				if((l != 0) && (l != 3))
					continue;

				if((l == 3) && (!flag))
					break;

				for(m = 0; m < 3; m++)
					foo[m] = tmp2[k][l + m];
	
				for(n = 0; n < ngr1; n++)
					if(Compara(foo, gram1[n], 3)){
						if(flag){
							gram2tmp[aux++][1] = n;
							break;
						}
						gram2tmp[aux][0] = n;
						flag = 1;
						break;
					}
			}

		/* CREAR UN ARRAY CON LAS SERIES DE 6 BYTES SIN REPETIR */
		for(k = 0; k < aux; k++){
			for(l = 0, flag = 1; l < ngr2; l++)
				if(Compara(gram2tmp[k], gram2[l], 2)){
					flag = 0;
					break;
				}

			if(flag){
				gram2[ngr2][0] = gram2tmp[k][0];
				gram2[ngr2++][1] = gram2tmp[k][1];
			}
		}

		/***********************************************************************/
		printf("\t-=[ Gramatica General ]=-\n\n");

		// IMPRIME PRIMERAS PRODUCCIONES
		for(k = 0; k < ngr1; k++){
			printf("gram1[%d] -> ", k);
			for(l = 0; l < 3; l++)
				printf("0x%.2x ", gram1[k][l]);
			putchar(0x0a);
		}

		// IMPRIME SEGUNDAS PRODUCCIONES
		for(k = 0; k < ngr2; k++){
			printf("gram2[%d] -> ", k);
			printf("gram1[%d] ", gram2[k][0]);
			printf("gram1[%d] ", gram2[k][1]);
			putchar(0x0a);
		}

		/***********************************************************************/
		printf("\n\t-=[ Analisis ]=-\n\n");

		/* CREAR UN ARRAY CON LAS SERIES DE 6 BYTES (EXPANDIDAS) */
		for(k = 0; k < ngr2; k++){
			for(l = 0; l < 3; l++)
				tmp2[k][l] = gram1[gram2[k][0]][l];

			for(; l < 6; l++)
				tmp2[k][l] = gram1[gram2[k][1]][l - 3];
		}

		/* BUSQUEDA DE PATRONES (PRODUCCIONES) EN EL ARCHIVO, CREAR LA   */
		/* GRAMATICA SOLO CON LAS PRODUCCIONES QUE SE USAN EN EL ARCHIVO */
		/* E IR ENVIANDO RESULTADOS AL ARCHIVO COMPRESO                  */
		GE *GE1 = (GE *) calloc(1024, sizeof(GE));
		GE *GE2 = (GE *) calloc(1024, sizeof(GE));
		infile = (byte *) malloc(insize);

		ptr = infile;
		while(!feof(in)){
			fread(ptr, 512, 1, in);
			ptr += 512;
		}

		p = ngr1, q = ngr2;
		ngr1 = ngr2 = x = y = aux = 0;

		for(k = 0; k < insize; k++, infile++){
			if((insize - k) < 3)
				goto	copia1byte;

			if((insize - k) < 6)
				goto	bloques2;

			for(l = 0, flag = 0; l < q; l++)
				if(Compara(tmp2[l], infile, 6)){
					printf("gram2[%d] @ offset %.2d\n", l, k);
					foo[0] = tmp2[l][0];
					foo[1] = tmp2[l][1];
					foo[2] = tmp2[l][2];
					for(m = 0, flag2 = 1; m < ngr1; m++)
						if(Compara(foo, gram1tmp[m], 3)){
							flag2 = 0;
							break;
						}

					if(flag2){
						gram1tmp[ngr1][0] = foo[0];
						gram1tmp[ngr1][1] = foo[1];
						gram1tmp[ngr1++][2] = foo[2];
					}

					foo[0] = tmp2[l][3];
					foo[1] = tmp2[l][4];
					foo[2] = tmp2[l][5];
					for(m = 0, flag3 = 1; m < ngr1; m++)
						if(Compara(foo, gram1tmp[m], 3)){
							flag3 = 0;
							break;
						}

					if(flag3){
						gram1tmp[ngr1][0] = foo[0];
						gram1tmp[ngr1][1] = foo[1];
						gram1tmp[ngr1++][2] = foo[2];
					}
					/********************************************************************/

					foo[0] = tmp2[l][0];
					foo[1] = tmp2[l][1];
					foo[2] = tmp2[l][2];
					for(m = 0; m < ngr1; m++)
						if(Compara(foo, gram1tmp[m], 3))
							break;

					foo[0] = tmp2[l][3];
					foo[1] = tmp2[l][4];
					foo[2] = tmp2[l][5];
					for(n = 0; n < ngr1; n++)
						if(Compara(foo, gram1tmp[n], 3))
							break;

					byte	foo2[2] = {m, n};

					for(o = 0, flag2 = 1; o < ngr2; o++)
						if(Compara(gram2[o], foo2, 2)){
							flag2 = 0;
							GE2[y].index = o;
							break;
						}

					if(flag2){
						gram2[ngr2][0] = m;
						gram2[ngr2++][1] = n;
						GE2[y].index = ngr2 - 1;
					}

					/********************************************************************/

					GE2[y++].offset	= aux++;
					fputc((int) GE2[y - 1].index, out);

					infile += 5;
					k += 5;
					flag = 1;
					break;
				}

			if(flag)
				continue;

bloques2:
			for(l = 0, flag3 = 0; l < p; l++)
				if(Compara(gram1[l], infile, 3)){
					printf("gram1[%d] @ offset %.2d\n", l, k);
					for(m = 0, flag2 = 1; m < ngr1; m++)
						if(Compara(gram1[l], gram1tmp[m], 3)){
							flag2 = 0;
							GE1[x].index = m;
							break;
						}

					if(flag2){
						gram1tmp[ngr1][0] = gram1[l][0];
						gram1tmp[ngr1][1] = gram1[l][1];
						gram1tmp[ngr1++][2] = gram1[l][2];
						GE1[x].index	= ngr1 - 1;
					}

					GE1[x++].offset	= aux++;
					fputc(GE1[x - 1].index, out);

					flag3 = 1;
					infile += 2;
					k += 2;
					break;
				}

			if(flag3)
				continue;

copia1byte:
			fputc((int) *infile, out);
			aux++;
		}

		for(k = 0; k < ngr1; k++)
			for(l = 0; l < 3; l++)
				gram1[k][l] = gram1tmp[k][l];

		/***********************************************************************/
		printf("\n\t-=[ Gramatica Reducida ]=-\n\n");
		// IMPRIME PRIMERAS PRODUCCIONES
		for(k = 0; k < ngr1; k++){
			printf("gram1[%d] -> ", k);
			for(l = 0; l < 3; l++)
				printf("0x%.2x ", gram1[k][l]);
			putchar(0x0a);
		}

		// IMPRIME SEGUNDAS PRODUCCIONES
		for(k = 0; k < ngr2; k++){
			printf("gram2[%d] -> ", k);
			printf("gram1[%d] ", gram2[k][0]);
			printf("gram1[%d] ", gram2[k][1]);
			putchar(0x0a);
		}

		/***********************************************************************/
		printf("\n\t-=[ Compresion ]=-\n\n");

		/* SE ENVIA INFORMACION DE LAS GRAMATICAS Y SUS POSICIONES EN EL ARCHIVO */
		z = (short) ngr1;
		fwrite(&z, sizeof(z), 1, gram);
		z = (short) ngr2;
		fwrite(&z, sizeof(z), 1, gram);
		fwrite(&x, sizeof(x), 1, gram);
		fwrite(&y, sizeof(y), 1, gram);

		for(k = 0; k < x; k++)
			fwrite(&GE1[k], sizeof(GE1[k]), 1, gram);

		for(k = 0; k < y; k++)
			fwrite(&GE2[k], sizeof(GE1[k]), 1, gram);

		/* SE ENVIAN LAS GRAMATICAS AL ARCHIVO */
		for(k = 0; k < ngr1; k++)
			fwrite(gram1[k], sizeof(gram1[k]), 1, gram);

		for(k = 0; k < ngr2; k++)
			fwrite(gram2[k], sizeof(gram2[k]), 1, gram);

		printf("Archivo Original:\t%s\t%d bytes\n", argv[2], insize);

		fclose(out);
		out = abrir_archivo(argv[3], "r");
		printf("Archivo Compreso:\t%s\t%d bytes\n", argv[3], fsize(out));
	} else{ /* DESCOMPRESION */
		in   = abrir_archivo(argv[2], "r");
		out  = abrir_archivo(argv[3], "w");
		gram = abrir_archivo(argv[4], "r");

		/* OBTENER INFORMACION DE LA COMPRESION */
		fread(&x, sizeof(x), 1, gram);
		ngr1 = (int) x;
		fread(&x, sizeof(x), 1, gram);
		ngr2 = (int) x;
		fread(&x, sizeof(x), 1, gram);
		fread(&y, sizeof(y), 1, gram);

		GE *GE1 = (GE *) calloc(x, sizeof(GE));
		GE *GE2 = (GE *) calloc(y, sizeof(GE));

		for(k = 0; k < x; k++)
			fread(&GE1[k], sizeof(GE1[k]), 1, gram);

		for(k = 0; k < y; k++)
			fread(&GE2[k], sizeof(GE1[k]), 1, gram);

		/* CARGAR LAS GRAMATICAS */
		byte	gram1[ngr1][3];
		byte	gram2[ngr2][2];
		byte	tmp2[ngr2][6];

		for(k = 0; k < ngr1; k++)
			fread(gram1[k], sizeof(gram1[k]), 1, gram);

		for(k = 0; k < ngr2; k++)
			fread(gram2[k], sizeof(gram2[k]), 1, gram);

		/***********************************************************************/
		printf("\t-=[ Gramatica ]=-\n\n");

		// IMPRIME PRIMERAS PRODUCCIONES
		for(k = 0; k < ngr1; k++){
			printf("gram1[%d] -> ", k);
			for(l = 0; l < 3; l++)
				printf("0x%.2x ", gram1[k][l]);
			putchar(0x0a);
		}

		// IMPRIME SEGUNDAS PRODUCCIONES
		for(k = 0; k < ngr2; k++){
			printf("gram2[%d] -> ", k);
			printf("gram1[%d] ", gram2[k][0]);
			printf("gram1[%d] ", gram2[k][1]);
			putchar(0x0a);
		}

		/***********************************************************************/
		printf("\n\t-=[ Analisis ]=-\n\n");

		/* CREAR UN ARRAY CON LAS SERIES DE 6 BYTES (EXPANDIDAS) */
		for(k = 0; k < ngr2; k++){
			for(l = 0; l < 3; l++)
				tmp2[k][l] = gram1[gram2[k][0]][l];

			for(; l < 6; l++)
				tmp2[k][l] = gram1[gram2[k][1]][l - 3];
		}

		/* IR ANALIZANDO EL ARCHIVO COMPRESO E IR ENVIANDO */
		/* RESULTADOS AL ARCHIVO DE SALIDA (DESCOMPRESO)   */
		insize = fsize(in);
		infile = (unsigned char *) malloc(insize);
		memset(infile, 0x00, insize);
		ptr = infile;

		while(!feof(in)){
			fread(ptr, 512, 1, in);
			ptr += 512;
		}

		for(l = 0; l < insize; l++){
			for(k = 0, flag = 0; k < y; k++)
				if(l == GE2[k].offset){
					printf("gram2[%d] @ offset %.2d\n", GE2[k].index, l);
					fwrite(tmp2[GE2[k].index], sizeof(tmp2[GE2[k].index]), 1, out);
					flag = 1;
					break;
				}

			if(flag)
				fseek(in, 1, SEEK_CUR);
			else{
				for(k = 0, flag2 = 0; k < x; k++)
					if(l == GE1[k].offset){
						printf("gram1[%d] @ offset %.2d\n", GE1[k].index, l);
						fwrite(gram1[GE1[k].index], sizeof(gram1[GE1[k].index]), 1, out);
						flag2 = 1;
						break;
					}

				if(flag2)
					fseek(in, 1, SEEK_CUR);
				else
					fputc((int) infile[l], out);
			}
		}

		/***********************************************************************/
		printf("\n\t-=[ Descompresion ]=-\n\n");

		printf("Archivo Compreso:\t%s\t%d bytes\n", argv[2], insize);

		fclose(out);
		out = abrir_archivo(argv[3], "r");
		printf("Archivo Descompreso:\t%s\t%d bytes\n", argv[3], fsize(out));
	}

	fclose(in);
	fclose(out);
	fclose(gram);

	return 0;
}

void uso(char *prog)
{
	fprintf(stderr, "Uso: %s [-c|-d] <entrada> <salida> <gramatica>\n", prog);
	fprintf(stderr, "\t-c\tComprimir\n");
	fprintf(stderr, "\t-d\tDescomprimir\n");
	exit(-0xdead);
}

FILE *abrir_archivo(char *nombre, char *modo)
{
	FILE	*fp;

	if((fp = fopen(nombre, modo)) == NULL){
		perror(nombre);
		exit(-1);
	}

	return fp;
}

int fsize(FILE *fp)
{
	int	cnt = 0;

	while(!feof(fp)){
		fgetc(fp);
		cnt++;
	}

	rewind(fp);

	return cnt - 1;
}

int Compara(byte foo[], byte bar[], int n)
{
	int	k;

	for(k = 0; k < n; k++)
		if(foo[k] != bar[k])
			return 0;

	return 1;
}

