Example #1
0
/** Opens appropriate files and handles them
    @param argc number of command line arguments 
    @param argv array of command line arguments
    @return the exit status of the program
  */
int main( int argc, char *argv[] )
{
  char *wordFile = "words.txt";
  
  FILE *input;
  FILE *output;
  
  if ( argc < 3 || argc > 4 ) {
    fprintf( stderr, "usage: pack <input.txt> <compressed.raw> [wordfile.txt]\n" );
    exit( 1 );
  }
  input = fopen( argv[ 1 ], "rb" );
  if ( !input ) {
    printf( "usage: readFile <input-file>\n" );
    exit( 1 );
  }
  output = fopen( argv[ 2 ], "rw" );
  if ( !output ) {
    printf( "usage: readFile <input-file>\n" );
    exit( 1 );
  }
  if ( argc == 4 ) {
    wordFile = argv[ 3 ];
  }
  
  WordList *wordList = readWordList( wordFile );

#ifdef DEBUG
  // Report the entire contents of the word list, once it's built.
  printf( "---- word list -----\n" );
  for ( int i = 0; i < wordList->len; i++ )
    printf( "%d == %s\n", i, wordList->words[ i ] );
  printf( "--------------------\n" );
#endif

  // ... 

  // Read the contents of the whole file into one big buffer.  This could be more
  // efficient, but it simplifies the rest of the program.
  char *buffer = readFile( input );

  // Write out codes for everything in the buffer.
  int pos = 0;
  PendingBits pending = { 0, 0 };
  while ( buffer[ pos ] ) {
    // Get the next code.
    int code = bestCode( wordList, buffer + pos );
#ifdef DEBUG
    printf( "%d <- %s\n", code, wordList->words[ code ] );
#endif

    // Write it out and move ahead by the number of characters we just encoded.
  readCode( &pending, input );
    pos += strlen( wordList->words[ code ] );
  }

  // Write out any remaining bits in the last, partial byte.
  flushBits( &pending, output );

  // ...
  fclose( input );
  fclose( output );
  return EXIT_SUCCESS;
}
Example #2
0
// Compress STDIN into stream of codes
// First byte sent in the form [MAXBITS (6 bits)][E_FLAG][P_FLAG]
//
// The only special code sent is ESCAPE for -e;
// everything else is derived in decode.
// 
// Pruning performed as soon as the table is full
int encode(int MAXBITS, int E_FLAG, int P_FLAG) {
    
    // Send option args encoded as:
    // MAXBITS: 6 bits (since max value is 20)
    // E_FLAG: 1 bit
    // P_FLAG: 1 bit
    
    putBits(6, MAXBITS);
    putBits(1, E_FLAG);
    putBits(1, P_FLAG);


    
    int next_code = 0; // == number of codes assigned == # elts in ARRAY
    int nBits = 1;     // #bits required to send NEXT code
    
    if (E_FLAG)
        next_code = 2; // already assigned 0 to QUIT
                       //                  1 to ESCAPE

    // ============== INITIALIZE TRIE ================
    Trie t = createT();

    if (!E_FLAG) { // initialize all one-char strings
        for (int K = 0; K < 256; K++)
            insertT(t, K, next_code++, 0);

        nBits = 8;
    }



    // ================ ENCODE INPUT =================

    Trie C = t;               // last node visited
    int K;
    while ((K = getchar()) != EOF) {


        Trie child = getT(C, K);

        if (child != NULL) {  // increment NAP and go down trie
            sawT(child); 
            C = child;
        }

        else { 

            // ============ PUTBITS ==========================
            if (C == t) { // new 1-char string

                if (!E_FLAG)
                    DIE_FORMAT("E_FLAG false, yet (EMPTY, K=%d) not in table\n", K);

                putBits(nBits, ESCAPE);
                putBits(CHAR_BIT, K); 
            }

            else {
                // Output code C
                putBits(nBits, getCodeT(C));
            } 

            
            // =========== INSERT ==============================

            // insert new code if table not full
            if (next_code < (1 << MAXBITS)) {

                insertT(C, K, next_code++, 1);
            }
            

            // =========== UPDATE NBITS =======================


            // Prune as soon as last slot taken
            if (next_code == (1 << MAXBITS)) {

                if (P_FLAG) {

                    next_code = prune(&t, E_FLAG);
                    nBits = get_nbits(next_code);
                }
                else
                    ;
            }

            // Increase NBITS only when #codes assigned
            // exceeds it
            else if (next_code > (1 << nBits))
                nBits++;
           



            // ============ RESET C =====
            if (C == t)         // new single-char, so skip
                continue;
            else {
                C = getT(t, K);

                if (C == NULL) { // (EMPTY, K) not in table
                    if (!E_FLAG)
                        DIE_FORMAT("E_FLAG false, yet (EMPTY, K=%d) not in table\n", K);

                    ungetc(K, stdin); // single-char on next insert
                    C = t;
                }
                else
                    sawT(C);     // increment NAP
            }
        }
    }
    
    // Put leftover known prefix
    if (C != t) {
        putBits(nBits, getCodeT(C));
    }
    
    flushBits();

    destroyT(t); 
    return 0;
}
Example #3
0
File: lzw.c Project: g-mainak/LZW
int encode(int args[])
{
	//freopen("hash.h", "r", stdin);
	Hash *h = malloc(sizeof(Hash));
	int size = 1 << (args[1]+1);
	unsigned int time =1;
	initialize(h, size, args[3], time);
	int c=EMPTY, k, latestCode;
	int bits=(args[3])?3:9;
	printf("%d %d %d|", args[1], args[2], args[3]);
	while((k = getchar()) != EOF)
		if(findInHash(h, c , (char)k) != EMPTY)
			c = findInHash(h, c , (char)k);
		else
		{
			if(args[3] && findInHash(h, EMPTY, (char)k) == EMPTY) //Time to send escape sequence
			{
				if (c!=EMPTY)
				{
					putBits(bits, c);
					h->shadowArray[c]->time=(++time);
				}
				putBits(bits, ESCAPE);
				putBits(CHAR_BIT, k);
				if(h->numElements < ( 1 << args[1]))
				{
					latestCode = insert(h, EMPTY, (char)k, 1);
					if(latestCode>= (1<<(bits)))
					{
						putBits(bits, INCR);
						bits++;
					}
				}
				c = EMPTY;
			}
			else
			{
				putBits(bits, c);
				h->shadowArray[c]->time=(++time);
				if(h->numElements < ( 1 << args[1]))
				{
					latestCode = insert(h, c, (char)k, 1);
					if(latestCode>= (1<<(bits)))
					{
						putBits(bits, INCR);
						bits++;
					}
				}
				c = findInHash(h, EMPTY, (char)k);
			}
			if(args[2] && h->numElements == ( 1 << args[1])-1)
			{
				putBits(bits, RESET);
				putBits(bits, c);
				prune(args, &h, time);
				//bitchange
				c=EMPTY;
			}
		}
	if (c!=EMPTY)
		putBits(bits, c);
	flushBits();
	//printHash(h);
	freeHash (h);
	return 0;
}
Example #4
0
int main(int argc, char **argv)
{
	clock_t begin, end;
	double time_spent;
	begin = clock();
	int NUM_FINDCODE_CALLS = 0;
	int NUM_HASHCELLS_VISITED = 0;
	int c = 0; //char in (pref, char)
	int prefix = EMPTY_CODE;
	int index = EMPTY_CODE;
	int MAXBITS = 15; //default
	char DUMP_TO[PATH_MAX];
	char INIT_FROM[PATH_MAX];
	int PRUNE = -1;
	Table *stringTable = initStringTable(MAXBITS);
//setting params from cmd line argv's STARTING FROM 2 BC ENCODE/DECODE
	for (int i = 2; i < argc; ++i)
	{
		if (strcmp(argv[i], "-m") == 0) {
			//ERROR CHECK FOR IF NEXT ARGV IS NOT INTEGER STRING??? 
			MAXBITS = atoi(argv[i + 1]);
			i++;
		} else if (strcmp(argv[i], "-o") == 0) {
			if(i + 1 == argc) {
				fprintf(stderr, "usage: encode [-m MAXBITS | -o NAME | -i NAME | -p USED]* OR decode [-o NAME]*\n");
				exit(EXIT_FAILURE);
				//EDGE CASE: IF -O -O HI, THEN DO WE DUMP TO '-O' AND SIGNAL ERROR ON HI? OR DO WE DUMP TO -O, AND OVERRIDE WITH DUMPING TO 'HI'? 
			}
			strncpy(DUMP_TO, argv[i + 1], strlen(argv[i + 1]));
			i++;
		} else if (strcmp(argv[i], "-i") == 0) {
			if(i + 1 == argc) {
				fprintf(stderr, "usage: encode [-m MAXBITS | -o NAME | -i NAME | -p USED]* OR decode [-o NAME]*\n");
				exit(EXIT_FAILURE);
			}
			strncpy(INIT_FROM, argv[i + 1], strlen(argv[i + 1]));
			i++;
		} else if (strcmp(argv[i], "-p") == 0) {
			if(i + 1 == argc) {
				fprintf(stderr, "usage: encode [-m MAXBITS | -o NAME | -i NAME | -p USED]* OR decode [-o NAME]*\n");
				exit(EXIT_FAILURE);
			}
			PRUNE = atoi(argv[i + 1]); //test if not an integer; what if prune is 0? w
			i++;
			if(PRUNE){}
		} else {
			fprintf(stderr, "usage: encode [-m MAXBITS | -o NAME | -i NAME | -p USED]* OR decode [-o NAME]*\n");
			exit(EXIT_FAILURE);
		}
	}
	fprintf(stderr, "argv[0]: %s\n", argv[0]);
	if(strcmp(argv[1], "encode") == 0) {
//ENCODE	
		while((c = getchar()) != EOF) {
			index = findCode(prefix, c, stringTable, &NUM_FINDCODE_CALLS, &NUM_HASHCELLS_VISITED);
			if (index != -1) //(pref, char) found in stringTable
			{
				prefix = index;
				(stringTable->table[prefix]->useCount)++;
				continue;
			} else { //not found in stringTable
				putBits(stringTable->nBits, prefix);
				insertToTable(prefix, c, &stringTable, PRUNE);
				prefix = findCode(0, c, stringTable, &NUM_FINDCODE_CALLS, &NUM_HASHCELLS_VISITED); //start search again at finalChar of newly found sequence
				(stringTable->table[prefix]->useCount)++;
			}
		}
		putBits(stringTable->nBits, prefix);
		flushBits();
		// printTable(stringTable, "encode");
		// fprintf(stderr, "\nNUM_FINDCODE_CALLS: %d\nNUM_HASHCELLS_VISITED: %d\nAVG_SEARCH_RATIO: %d\n", NUM_FINDCODE_CALLS, NUM_HASHCELLS_VISITED, NUM_HASHCELLS_VISITED / NUM_FINDCODE_CALLS);
	} else if(strcmp(argv[1], "decode") == 0) {
//DECODE
		char ENCODE_OR_DECODE[25] = "decode";
		int oldIndex = 0, firstCharOfNewIndex = 0; //used to build stringTable, 1 step behind encode
		int newIndex = 0;
		int cascadingIndex = 0; //used to print a code string recursively
		int kwkwk = 0; //used in kwkwk case
		int prunedThisPass = 0;
		int HIT_THE_LIMIT = 0;

		while( (newIndex = cascadingIndex = getBits(stringTable->nBits)) != EOF) {
				//nBits was incremented just before to accomodate increased nBits in ENCODE that hasn't been automatically detected in DECODE
				//and we decrement it now bc it will be automatically incremented in INSERTTABLE
				//but if == MAXBITS, then we did not artificially increment nBits last round 

			if(stringTable->last + 1 == stringTable->size && !HIT_THE_LIMIT ) {//2nd condition happens when table is already maxSize, in which case no more doubling or pruning happens
				if(!prunedThisPass) //when pruning results in an at-limit table
					stringTable->nBits--;
				if(stringTable->size != 1<<stringTable->nBits){
					fprintf(stderr, "size: %d, nBits: %d\n", stringTable->size, stringTable->nBits);
					exit(EXIT_FAILURE);
				} else {
				}
			}
			if(newIndex > stringTable->last) //newIndex is an unknown code (kwkwk abnormal case)
			{
				kwkwk = 1;
				if(newIndex < stringTable->size)
					stringTable->table[newIndex]->useCount++; //useCount usually incremented in printRecursive function, which will not receive newIndex in this case
				cascadingIndex = oldIndex;
			}
			printRecursive(&cascadingIndex, stringTable);
			if (kwkwk == 1) {
				printf("%c", firstCharOfNewIndex); //output should be oldCode, oldCode, firstCharOfOldCode
				kwkwk = 0;
			}
			if(cascadingIndex >= stringTable->size) {
				fprintf(stderr, "cascadingIndex: %d, size: %d\n", cascadingIndex, stringTable->size);
				exit(EXIT_FAILURE);
			}
			firstCharOfNewIndex = stringTable->table[cascadingIndex]->c; //finalK = char(c)
			if(oldIndex != 0 && !prunedThisPass) { //every time except first time, and after pruning, add new code to table
				insertToTable(oldIndex, firstCharOfNewIndex, &stringTable, PRUNE);
			}
			prunedThisPass = 0;
			oldIndex = newIndex;
			if(stringTable->last + 1 >= stringTable->size) { 
			//decode is one step behind encode; deocde inserts new code every time it reads a code, starting from 2nd code
			//encode adds a code at every code starting from 1st code. CURRENT CODE + firstChar of NEXT CODE was the last code
			//encode tried to add to table and PRUNED or DOUBLE'd
			//=>PRUNING or DOUBLING::
			//if DOUBLING: next code will increment nBits
			//if PRUNING: next code is encoded from a PRUNED table, not CURRENT table (and we don't insert next round)
				if(stringTable->nBits != stringTable->MAXBITS) {
					if(stringTable->nBits == 8){
						exit(EXIT_FAILURE);
					}
					stringTable->nBits++;
				} else { //next insert exceeds table size, and table size is MAXBITS size, so PRUNE
					HIT_THE_LIMIT = 1;
					if(PRUNE != -1) {
						prune(&stringTable, PRUNE, ENCODE_OR_DECODE);
						prunedThisPass = 1;
						if (stringTable->last + 1 == stringTable->size)
						{
							fprintf(stderr, "HERE!!! last: %d, size: %d\n", stringTable->last, stringTable->size);
							char temp[10] = "encode";
							printTable(stringTable, temp);
						}
						if(stringTable->last + 1 != 1<<stringTable->MAXBITS)
							HIT_THE_LIMIT = 0;
					}
				}
			}
		}
		// printTable(stringTable, "decode");
	}
	moses(stringTable);
	end = clock();
	time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
	fprintf(stderr, "time_spent: %f\n", time_spent);
}
Example #5
0
void encode(FILE* infile, FILE* outfile, unsigned int maxBits, unsigned int window, bool eFlag)
{
    stringTable* table = stringTableNew(maxBits, eFlag);
    pruneInfo* pi = pruneInfoNew(maxBits);

    // write maxBits, window, and eFlag to outfile
    putBits(NBITS_MAXBITS, maxBits, outfile);
    putBits(NBITS_WINDOW, window, outfile);
    eFlag ? putBits(NBITS_EFLAG, 1, outfile) : putBits(NBITS_EFLAG, 0, outfile);

    // the string table is populated with (c, k) pairs; c is the code for the
    // prefix of the entry, k is the char appended to the end of the prefix
    unsigned int c = EMPTY_PREFIX;
    int k;

    unsigned char nbits = (eFlag) ? 2 : 9; // number of bits sent per code

    while((k = fgetc(infile)) != EOF)
    {
        tableElt* elt = stringTableHashSearch(table, c, k);

        if(elt)
        {
            c = elt->code;
        }
        else if(c == EMPTY_PREFIX)
        {
            // we're escaping k, so leave the prefix empty
            escapeChar(table, pi, k, &nbits, outfile);

            table = checkPrune(table, pi, window, &c, &nbits, outfile);
        }
        else
        {
            putBits(nbits, c, outfile);
            pruneInfoSawCode(pi, c);

            stringTableAdd(table, c, k, NULL);

            table = checkPrune(table, pi, window, &c, &nbits, outfile);

            checkNbits(&nbits, table, outfile);

            tableElt* kCode = stringTableHashSearch(table, EMPTY_PREFIX, k);
            if(kCode)
            {
                c = kCode->code;
            }
            else
            {
                escapeChar(table, pi, k, &nbits, outfile);
                c = EMPTY_PREFIX; // since we escaped k, we now have no prefix
                checkPrune(table, pi, window, &c, &nbits, outfile);
            }
        }
    }

    if(c != EMPTY_PREFIX) putBits(nbits, c, outfile);

    putBits(nbits, STOP_CODE, outfile);
    flushBits(outfile);
    stringTableDelete(table);
    pruneInfoDelete(pi);
}
Example #6
0
File: lzw.c Project: krishpop/LZW
// encodes the input stream by taking advantage of lzw algorithm
// also implements logic to prune the trie structure used to store the string
// table, and escapes single character codes
void encode(int e, int m, int p) {
    Trie st;
    createT(&st, e);
    int c = EMPTY;                    // same as (EMPTY, K), index of the prefix

    // int value of char k we are testing to see if c,k exists in the table
    int k;

    // number of codes you have inserted, 256 without escape flag...
    int codeCount = (e) ? 3 : 259;
    int bitCount = (e) ? 2 : 9;
    int maxbits = (m<=8 || m>20) ? 12 : m;
    int maxcodes = (1 << maxbits);
    int firstRead = false;   // if first read of k when e flag is present

    int pruneCount = 0;

    printf("%02d:%d:%d:", maxbits, p, e);
    while((k = getchar())!= EOF) {
        st[c].appearances++;
        int ck = searchT(&st, c, k, e);    // will increment c's appearance once

        // if ck is not in the table
        if(ck<0) {
            // if prune flag & reached maxcodes, do a prune before next insert
            // into the table, putBits 0 to indicate a prune has occurred
            // a prune should likewise happen in decode
            if(c!=EMPTY) {
                putBits(bitCount, c);
            }
            // add ck to the table as long as (e && c == EMPTY) is false
            // we will add (empty, k) to the table after this condition
            // !e and c==EMPTY will never happen, bc all chars will have been
            // added as children to empty

            // prune right before we reach maxcodes, we would have lost the next
            // code we insert anyways, now we won't lose k
            if(p&&(codeCount+1==maxcodes)) {
                putBits(bitCount, 0);
                pruneCount++;
                Trie newst;
                createT(&newst, e);
                int oldCodeCount = codeCount;
                codeCount=pruneT(&st, &newst, e, oldCodeCount);
                destroyT(&st, oldCodeCount);
                st = newst;
                bitCount=codeLength(codeCount);
                c=EMPTY;
                ungetc(k, stdin);
                continue;
            }
            //
            if(!e || c!=EMPTY) {
                if(codeCount<maxcodes) {
                    if(tableFilled(codeCount+1)) {
                        int newSize = (codeCount+1)*2;
                        expandT(&st, newSize);
                        bitCount++;
                    }
                    addT(&st, c, k, codeCount);
                    codeCount++;
                }
            }
            // if escape flag is on and k is not yet added to the table
            if(e && searchT(&st, EMPTY, k, e) < 0) {
                putBits(bitCount, ESC); // 1 is the index of escape character
                putBits(8, k);
                if(codeCount<maxcodes) {
                    if(codeLength(codeCount+1)-codeLength(codeCount)) {
                        int newSize = (codeCount+1)*2;
                        expandT(&st, newSize);
                        bitCount++;
                    }
                    addT(&st, EMPTY, k, codeCount);
                    codeCount++;
                }
                firstRead=true; // encode escaped something, don't unget(k)
                // if this happens
            }

            c = EMPTY; // make c empty again
            if(!firstRead) {
                ungetc(k, stdin);   // put k back to start reading
            }
            // a new character
            else {
                firstRead = false;
            }
        }
        else {
            c=ck; // set c to index of next code
        }
    }


    if(c!=EMPTY) {
        putBits(bitCount, c);
    }

    putBits(bitCount, EOFILE);   // puts EOF
    flushBits();
    destroyT(&st, codeCount);
}