// Compress STDIN into stream of codes // First byte sent in the form [MAXBITS (6 bits)][E_FLAG][P_FLAG] // // The only special code sent is ESCAPE for -e; // everything else is derived in decode. // // Pruning performed as soon as the table is full int encode(int MAXBITS, int E_FLAG, int P_FLAG) { // Send option args encoded as: // MAXBITS: 6 bits (since max value is 20) // E_FLAG: 1 bit // P_FLAG: 1 bit putBits(6, MAXBITS); putBits(1, E_FLAG); putBits(1, P_FLAG); int next_code = 0; // == number of codes assigned == # elts in ARRAY int nBits = 1; // #bits required to send NEXT code if (E_FLAG) next_code = 2; // already assigned 0 to QUIT // 1 to ESCAPE // ============== INITIALIZE TRIE ================ Trie t = createT(); if (!E_FLAG) { // initialize all one-char strings for (int K = 0; K < 256; K++) insertT(t, K, next_code++, 0); nBits = 8; } // ================ ENCODE INPUT ================= Trie C = t; // last node visited int K; while ((K = getchar()) != EOF) { Trie child = getT(C, K); if (child != NULL) { // increment NAP and go down trie sawT(child); C = child; } else { // ============ PUTBITS ========================== if (C == t) { // new 1-char string if (!E_FLAG) DIE_FORMAT("E_FLAG false, yet (EMPTY, K=%d) not in table\n", K); putBits(nBits, ESCAPE); putBits(CHAR_BIT, K); } else { // Output code C putBits(nBits, getCodeT(C)); } // =========== INSERT ============================== // insert new code if table not full if (next_code < (1 << MAXBITS)) { insertT(C, K, next_code++, 1); } // =========== UPDATE NBITS ======================= // Prune as soon as last slot taken if (next_code == (1 << MAXBITS)) { if (P_FLAG) { next_code = prune(&t, E_FLAG); nBits = get_nbits(next_code); } else ; } // Increase NBITS only when #codes assigned // exceeds it else if (next_code > (1 << nBits)) nBits++; // ============ RESET C ===== if (C == t) // new single-char, so skip continue; else { C = getT(t, K); if (C == NULL) { // (EMPTY, K) not in table if (!E_FLAG) DIE_FORMAT("E_FLAG false, yet (EMPTY, K=%d) not in table\n", K); ungetc(K, stdin); // single-char on next insert C = t; } else sawT(C); // increment NAP } } } // Put leftover known prefix if (C != t) { putBits(nBits, getCodeT(C)); } flushBits(); destroyT(t); return 0; }
// encodes the input stream by taking advantage of lzw algorithm // also implements logic to prune the trie structure used to store the string // table, and escapes single character codes void encode(int e, int m, int p) { Trie st; createT(&st, e); int c = EMPTY; // same as (EMPTY, K), index of the prefix // int value of char k we are testing to see if c,k exists in the table int k; // number of codes you have inserted, 256 without escape flag... int codeCount = (e) ? 3 : 259; int bitCount = (e) ? 2 : 9; int maxbits = (m<=8 || m>20) ? 12 : m; int maxcodes = (1 << maxbits); int firstRead = false; // if first read of k when e flag is present int pruneCount = 0; printf("%02d:%d:%d:", maxbits, p, e); while((k = getchar())!= EOF) { st[c].appearances++; int ck = searchT(&st, c, k, e); // will increment c's appearance once // if ck is not in the table if(ck<0) { // if prune flag & reached maxcodes, do a prune before next insert // into the table, putBits 0 to indicate a prune has occurred // a prune should likewise happen in decode if(c!=EMPTY) { putBits(bitCount, c); } // add ck to the table as long as (e && c == EMPTY) is false // we will add (empty, k) to the table after this condition // !e and c==EMPTY will never happen, bc all chars will have been // added as children to empty // prune right before we reach maxcodes, we would have lost the next // code we insert anyways, now we won't lose k if(p&&(codeCount+1==maxcodes)) { putBits(bitCount, 0); pruneCount++; Trie newst; createT(&newst, e); int oldCodeCount = codeCount; codeCount=pruneT(&st, &newst, e, oldCodeCount); destroyT(&st, oldCodeCount); st = newst; bitCount=codeLength(codeCount); c=EMPTY; ungetc(k, stdin); continue; } // if(!e || c!=EMPTY) { if(codeCount<maxcodes) { if(tableFilled(codeCount+1)) { int newSize = (codeCount+1)*2; expandT(&st, newSize); bitCount++; } addT(&st, c, k, codeCount); codeCount++; } } // if escape flag is on and k is not yet added to the table if(e && searchT(&st, EMPTY, k, e) < 0) { putBits(bitCount, ESC); // 1 is the index of escape character putBits(8, k); if(codeCount<maxcodes) { if(codeLength(codeCount+1)-codeLength(codeCount)) { int newSize = (codeCount+1)*2; expandT(&st, newSize); bitCount++; } addT(&st, EMPTY, k, codeCount); codeCount++; } firstRead=true; // encode escaped something, don't unget(k) // if this happens } c = EMPTY; // make c empty again if(!firstRead) { ungetc(k, stdin); // put k back to start reading } // a new character else { firstRead = false; } } else { c=ck; // set c to index of next code } } if(c!=EMPTY) { putBits(bitCount, c); } putBits(bitCount, EOFILE); // puts EOF flushBits(); destroyT(&st, codeCount); }
int decode() { // Decode first byte as options int MAXBITS = getBits(6); int E_FLAG = getBits(1); int P_FLAG = getBits(1); if (MAXBITS <= CHAR_BIT || MAXBITS > 20 || E_FLAG == EOF || P_FLAG == EOF) DIE("decode: bit stream not encoded by encode"); int next_code = 0; // == number of codes assigned == # elts in ARRAY int nBits = 1; // #bits required to send NEXT code if (E_FLAG) next_code = 2; // already assigned 0 to QUIT // 1 to ESCAPE // =============== INITIALIZE TRIE ===================== Trie t = createT(); if (!E_FLAG) { // initialize all one-char strings for (int K = 0; K < 256; K++) insertT(t, K, next_code++, 0); nBits = 8; } // =============== DECODE BIT STREAM ==================== int C; int last_insert = EMPTY; // code assigned to last inserted node while ((C = getBits(nBits)) != EOF) { // -e: Break on C = QUIT (flushBits() junk) if (E_FLAG && C == QUIT) break; // ========== PRINT STRING WITH NEW CODE ======= int finalK; // first char in C string // -e: check for ESCAPE if (E_FLAG && C == ESCAPE) { finalK = getBits(CHAR_BIT); if (finalK == EOF) DIE("decode: bit stream not encoded by encode"); putchar(finalK); } else { int KwK = 0; finalK = putstring(C, &KwK); // DIEs if C not in table // If C was just inserted w/ STANDBY (KwK), // print oldC==Kw then K if (KwK) putchar(finalK); } // =========== PATCH LAST-INSERTED STRING ========= // K now known for word inserted with prefix OLDC if (last_insert != EMPTY) updateK(last_insert, finalK); // =========== INSERT NEW CODE ==================== // insert new code if table not full if (next_code < (1 << MAXBITS)) { if (E_FLAG && (C == ESCAPE)) { insertT(t, finalK, next_code++, 1); last_insert = EMPTY; } else { // Insert node with C as prefix and K=STANDBY insertT( C_to_T(C), STANDBY, next_code, 1); last_insert = next_code++; } } else last_insert = EMPTY; // no insert to update next time // =========== UPDATE NBITS ======================= // Prune as soon as last slot taken if (next_code == (1 << MAXBITS)) { if (P_FLAG) { next_code = prune(&t, E_FLAG); nBits = get_nbits(next_code); // no need to update K in insertion // since it'll be pruned last_insert = EMPTY; } else ; } // Increase NBITS only when #codes assigned // exceeds it else if (next_code > (1 << nBits)) nBits++; } destroyT(t); return 0; }
// decodes using lzw algorithm void decode() { int code, newcode; int maxbits = getFlags(2); int maxcodes = (1 << maxbits); int p = getFlags(1); int e = getFlags(1); Trie st; createT(&st, e); int bitCount = (e) ? 2 : 9; int codeCount = (e) ? 3 : 259; int oldc = EMPTY; bool kwk = false; char baseK; int pruneCount=0, kwkcount=0; while((newcode=code=getBits(bitCount))!=EOFILE) { // under these conditions, a valid prune can occur if(p && code==EMPTY) { pruneCount++; Trie newst; createT(&newst, e); int oldCodeCount=codeCount; codeCount=pruneT(&st, &newst, e, oldCodeCount); destroyT(&st, oldCodeCount); st=newst; bitCount=codeLength(codeCount); oldc=EMPTY; continue; } if(newcode>=codeCount+1) { ERROR("code impossible to decode\n"); } // read an escaped character else if(e && code==ESC) { if(tableFilled(codeCount+1)&&bitCount!=codeLength(codeCount+1)) { if(bitCount<maxbits) { bitCount++; } } code=getBits(8); baseK=(char)code; putchar(baseK); if(codeCount<maxcodes) { if(tableFilled(codeCount+1)) { expandT(&st, (codeCount)*2); } addT(&st, oldc, code, codeCount); codeCount++; // then we need to add the char k as it's own code if(oldc!=EMPTY) { if(tableFilled(codeCount+1)) { expandT(&st, (codeCount)*2); } addT(&st, EMPTY, code, codeCount); codeCount++; if(tableFilled(codeCount)&&bitCount!=codeLength(codeCount)) { if(bitCount<maxbits) { bitCount++; } } } } oldc=EMPTY; } else { // no escape character called, would read c and k normally if(newcode==codeCount) { kwk=true; code=oldc; // omega, need to print k after } baseK=outputCode(&st, code, true); if(kwk) { putchar(baseK); kwkcount++; } // oldc is empty on the first read, and when the e-flag is present // oldc is zero when the last character read was escaped if(oldc!=EMPTY) { if(codeCount<maxcodes) { if(tableFilled(codeCount+1)) { expandT(&st, (codeCount)*2); } addT(&st, oldc, (int)baseK, codeCount); codeCount++; if(kwk) { // we added kwk after seeing it once already in the prev // scan through so we should increase its number of apps st[newcode].appearances++; // this scenario means we have kkk, without w in between // so if(st[st[oldc].prefix].prefix==EMPTY&&kwkcount==1) { st[oldc].appearances--; } kwk=false; } if(tableFilled(codeCount+1)) { if(bitCount<maxbits&&bitCount!=codeLength(codeCount+1)) { bitCount++; } } } } else if(e) { // if e-flag & last char was excaped, increase bit count if // table is filled now if(tableFilled(codeCount+1)) { if(bitCount<maxbits) { bitCount++; } } } oldc = newcode; } } destroyT(&st, codeCount); }