/**************************************************************************** * Function : EncodeLZSSByFile * Description: This function will read an input file and write an output * file encoded according to the traditional LZSS algorithm. * This algorithm encodes strings as 16 bits (a 12 bit offset * + a 4 bit length). * Parameters : fpIn - pointer to the open binary file to encode * fpOut - pointer to the open binary file to write encoded * output * Effects : fpIn is encoded and written to fpOut. Neither file is * closed after exit. * Returned : EXIT_SUCCESS or EXIT_FAILURE ****************************************************************************/ int EncodeLZSSByFile(FILE *fpIn, FILE *fpOut) { bit_file_t *bfpOut; encoded_string_t matchData; unsigned int i, c; unsigned int len; /* length of string */ /* head of sliding window and lookahead */ unsigned int windowHead, uncodedHead; /* use stdin if no input file */ if (fpIn == NULL) { fpIn = stdin; } if (fpOut == NULL) { /* use stdout if no output file */ bfpOut = MakeBitFile(stdout, BF_WRITE); } else { /* convert output file to bitfile */ bfpOut = MakeBitFile(fpOut, BF_WRITE); } windowHead = 0; uncodedHead = 0; /* Window Size : 2^12 same as offset */ /************************************************************************ * Fill the sliding window buffer with some known vales. DecodeLZSS must * use the same values. If common characters are used, there's an * increased chance of matching to the earlier strings. ************************************************************************/ memset(slidingWindow, ' ', WINDOW_SIZE * sizeof(unsigned char)); /* MAX_CODED : 2 to 17 because we cant have 0 to 1 */ /************************************************************************ * Copy MAX_CODED bytes from the input file into the uncoded lookahead * buffer. ************************************************************************/ for (len = 0; len < MAX_CODED && (c = getc(fpIn)) != EOF; len++) { uncodedLookahead[len] = c; } if (len == 0) { return (EXIT_SUCCESS); /* inFile was empty */ } /* Look for matching string in sliding window */ InitializeSearchStructures(); matchData = FindMatch(windowHead, uncodedHead); /* now encoded the rest of the file until an EOF is read */ while (len > 0) { if (matchData.length > len) { /* garbage beyond last data happened to extend match length */ matchData.length = len; } if (matchData.length <= MAX_UNCODED) { /* not long enough match. write uncoded flag and character */ BitFilePutBit(UNCODED, bfpOut); BitFilePutChar(uncodedLookahead[uncodedHead], bfpOut); matchData.length = 1; /* set to 1 for 1 byte uncoded */ } else { unsigned int adjustedLen; /* adjust the length of the match so minimun encoded len is 0*/ adjustedLen = matchData.length - (MAX_UNCODED + 1); /* match length > MAX_UNCODED. Encode as offset and length. */ BitFilePutBit(ENCODED, bfpOut); BitFilePutBitsInt(bfpOut, &matchData.offset, OFFSET_BITS, sizeof(unsigned int)); BitFilePutBitsInt(bfpOut, &adjustedLen, LENGTH_BITS, sizeof(unsigned int)); } /******************************************************************** * Replace the matchData.length worth of bytes we've matched in the * sliding window with new bytes from the input file. ********************************************************************/ i = 0; while ((i < matchData.length) && ((c = getc(fpIn)) != EOF)) { /* add old byte into sliding window and new into lookahead */ ReplaceChar(windowHead, uncodedLookahead[uncodedHead]); uncodedLookahead[uncodedHead] = c; windowHead = Wrap((windowHead + 1), WINDOW_SIZE); uncodedHead = Wrap((uncodedHead + 1), MAX_CODED); i++; } /* handle case where we hit EOF before filling lookahead */ while (i < matchData.length) { ReplaceChar(windowHead, uncodedLookahead[uncodedHead]); /* nothing to add to lookahead here */ windowHead = Wrap((windowHead + 1), WINDOW_SIZE); uncodedHead = Wrap((uncodedHead + 1), MAX_CODED); len--; i++; } /* find match for the remaining characters */ matchData = FindMatch(windowHead, uncodedHead); } /* we've decoded everything, free bitfile structure */ BitFileToFILE(bfpOut); return (EXIT_SUCCESS); }
/* * aazip - compress files using a transform based compression system */ int main(int argc, char** argv) { FILE* f; bit_file_t* of; char* infile,*outfile; uint8_t* input,*lupdate,*bwt,lumode; int32_t I,osize,opt; uint32_t size; mode_t lupdate_alg; float ient,oent; uint64_t cost,tstart,tstop,elapsed; /* parse command line parameter */ opt = GETOPT_FINISHED; if (argc <= 1) { print_usage(argv[0]); exit(EXIT_FAILURE); } while ((opt = getopt(argc, argv, "m:h")) != GETOPT_FINISHED) { switch (opt) { case 'm': if (strcmp(optarg, "simple") == 0) lupdate_alg = SIMPLE; else if (strcmp(optarg, "mtf") == 0) lupdate_alg = MTF; else if (strcmp(optarg, "fc") == 0) lupdate_alg = FC; else if (strcmp(optarg, "wfc") == 0) lupdate_alg = WFC; else if (strcmp(optarg, "timestamp") == 0) lupdate_alg = TS; else fatal("ERROR: mode <%s> unknown!\n", optarg); break; case 'h': default: print_usage(argv[0]); exit(EXIT_FAILURE); } } /* read input file name */ if (optind < argc) infile = argv[optind]; else { print_usage(argv[0]); exit(EXIT_FAILURE); } /* read input file */ f = safe_fopen(infile,"r"); size = safe_filesize(f); input = (uint8_t*) safe_malloc(size+1); if (fread(input,1,size,f)!=(size_t)size) { fatal("read input file."); } safe_fclose(f); input[size] = 0; /* TODO calculate input entropy */ ient = 0.0f; /* perform bwt */ bwt = (uint8_t*) safe_malloc(size); tstart = gettime(); bwt = transform_bwt(input,size,bwt,&I); /* peform list update */ switch (lupdate_alg) { case SIMPLE: fprintf(stdout,"ALGORITHM: simple\n"); lupdate = lupdate_simple(bwt,size,input,&cost); break; case MTF: fprintf(stdout,"ALGORITHM: move to front\n"); lupdate = lupdate_movetofront(bwt,size,input,&cost); break; case FC: fprintf(stdout,"ALGORITHM: frequency count\n"); lupdate = lupdate_freqcount(bwt,size,input,&cost); break; case WFC: fprintf(stdout,"ALGORITHM: weighted frequency count\n"); lupdate = lupdate_wfc(bwt,size,input,&cost); break; case TS: fprintf(stdout,"ALGORITHM: timestamp\n"); lupdate = lupdate_timestamp(bwt,size,input,&cost); break; default: fatal("unkown list update algorithm."); } fprintf(stdout,"INPUT: %s (%d bytes)\n",infile,size); fprintf(stdout,"COST: %lu\n",cost); /* TODO calculate entropy after list update*/ oent = 0.0f; /* write output */ outfile = safe_strcat(infile,".aazip"); /* create bit file for writing */ of = BitFileOpen(outfile, BF_WRITE); /* write aa zip header */ BitFilePutChar('A', of); BitFilePutChar('A', of); /* write I */ BitFilePutBitsInt(of,&I,32,sizeof(uint32_t)); /* write lupdate mode */ lumode = lupdate_alg; BitFilePutBitsInt(of,&lumode,8,sizeof(uint8_t)); fprintf(stderr,"I %d lumode %d\n",I,lumode); /* perform huffman coding */ encode_huffman(lupdate,size,of); tstop = gettime(); elapsed = tstop - tstart; fprintf(stdout,"TIME: %.3f s\n",(float)elapsed/1000000); /* flush and get file stats */ BitFileFlushOutput(of,0); f = BitFileToFILE(of); osize = ftell(f); fprintf(stdout,"OUTPUT: %s\n",outfile); fprintf(stdout,"ENTROPY: %.2f bps / %.2f bps\n",ient,oent); fprintf(stdout,"COMPRESSION: %.2f\n",((float)osize/(float)size)*100); /* clean up*/ safe_fclose(f); free(input); free(bwt); return (EXIT_SUCCESS); }
/**************************************************************************** * Function : HuffmanDecodeFile * Description: This routine reads a Huffman coded file and writes out a * decoded version of that file. * Parameters : inFile - Open file pointer for file to decode * outFile - Open file pointer for file receiving decoded data * Effects : Huffman encoded file is decoded * Returned : 0 for success, -1 for failure. errno will be set in the * event of a failure. Either way, inFile and outFile will * be left open. ****************************************************************************/ int HuffmanDecodeFile(FILE *inFile, FILE *outFile) { huffman_node_t *huffmanArray[NUM_CHARS]; /* array of all leaves */ huffman_node_t *huffmanTree; huffman_node_t *currentNode; int i, c; bit_file_t *bInFile; /* validate input and output files */ if ((NULL == inFile) || (NULL == outFile)) { errno = ENOENT; return -1; } bInFile = MakeBitFile(inFile, BF_READ); if (NULL == bInFile) { perror("Making Input File a BitFile"); return -1; } /* allocate array of leaves for all possible characters */ for (i = 0; i < NUM_CHARS; i++) { if ((huffmanArray[i] = AllocHuffmanNode(i)) == NULL) { /* allocation failed clear existing allocations */ for (i--; i >= 0; i--) { free(huffmanArray[i]); } inFile = BitFileToFILE(bInFile); return -1; } } /* populate leaves with frequency information from file header */ if (0 != ReadHeader(huffmanArray, bInFile)) { for (i = 0; i < NUM_CHARS; i++) { free(huffmanArray[i]); } inFile = BitFileToFILE(bInFile); return -1; } /* put array of leaves into a huffman tree */ if ((huffmanTree = BuildHuffmanTree(huffmanArray, NUM_CHARS)) == NULL) { FreeHuffmanTree(huffmanTree); inFile = BitFileToFILE(bInFile); return -1; } /* now we should have a tree that matches the tree used on the encode */ currentNode = huffmanTree; while ((c = BitFileGetBit(bInFile)) != EOF) { /* traverse the tree finding matches for our characters */ if (c != 0) { currentNode = currentNode->right; } else { currentNode = currentNode->left; } if (currentNode->value != COMPOSITE_NODE) { /* we've found a character */ if (currentNode->value == EOF_CHAR) { /* we've just read the EOF */ break; } fputc(currentNode->value, outFile); /* write out character */ currentNode = huffmanTree; /* back to top of tree */ } } /* clean up */ inFile = BitFileToFILE(bInFile); /* make file normal again */ FreeHuffmanTree(huffmanTree); /* free allocated memory */ return 0; }
/**************************************************************************** * Function : HuffmanEncodeFile * Description: This routine genrates a huffman tree optimized for a file * and writes out an encoded version of that file. * Parameters : inFile - Open file pointer for file to encode (must be * rewindable). * outFile - Open file pointer for file receiving encoded data * Effects : File is Huffman encoded * Returned : 0 for success, -1 for failure. errno will be set in the * event of a failure. Either way, inFile and outFile will * be left open. ****************************************************************************/ int HuffmanEncodeFile(FILE *inFile, FILE *outFile) { huffman_node_t *huffmanTree; /* root of huffman tree */ code_list_t codeList[NUM_CHARS]; /* table for quick encode */ bit_file_t *bOutFile; int c; /* validate input and output files */ if ((NULL == inFile) || (NULL == outFile)) { errno = ENOENT; return -1; } bOutFile = MakeBitFile(outFile, BF_WRITE); if (NULL == bOutFile) { perror("Making Output File a BitFile"); return -1; } /* build tree */ if ((huffmanTree = GenerateTreeFromFile(inFile)) == NULL) { outFile = BitFileToFILE(bOutFile); return -1; } /* build a list of codes for each symbol */ /* initialize code list */ for (c = 0; c < NUM_CHARS; c++) { codeList[c].code = NULL; codeList[c].codeLen = 0; } if (0 != MakeCodeList(huffmanTree, codeList)) { outFile = BitFileToFILE(bOutFile); return -1; } /* write out encoded file */ /* write header for rebuilding of tree */ WriteHeader(huffmanTree, bOutFile); /* read characters from file and write them to encoded file */ rewind(inFile); /* start another pass on the input file */ while((c = fgetc(inFile)) != EOF) { BitFilePutBits(bOutFile, BitArrayGetBits(codeList[c].code), codeList[c].codeLen); } /* now write EOF */ BitFilePutBits(bOutFile, BitArrayGetBits(codeList[EOF_CHAR].code), codeList[EOF_CHAR].codeLen); /* free the code list */ for (c = 0; c < NUM_CHARS; c++) { if (codeList[c].code != NULL) { BitArrayDestroy(codeList[c].code); } } /* clean up */ outFile = BitFileToFILE(bOutFile); /* make file normal again */ FreeHuffmanTree(huffmanTree); /* free allocated memory */ return 0; }
/*************************************************************************** * Function : main * Description: This function demonstrates the usage of each of the bit * bit file functions. * Parameters : argc - the number command line arguments (not used) * Parameters : argv - array of command line arguments (not used) * Effects : Writes bit file, reads back results, printing them to * stdout. * Returned : EXIT_SUCCESS ***************************************************************************/ int main(int argc, char *argv[]) { bit_file_t *bfp; FILE *fp; int i, numCalls, value; if (argc < 2) { numCalls = NUM_CALLS; } else { numCalls = atoi(argv[1]); } /* create bit file for writing */ bfp = BitFileOpen("testfile", BF_WRITE); if (bfp == NULL) { perror("opening file"); return (EXIT_FAILURE); } /* write chars */ value = (int)'A'; for (i = 0; i < numCalls; i++) { printf("writing char %c\n", value); if(BitFilePutChar(value, bfp) == EOF) { perror("writing char"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } value++; } /* write single bits */ value = 0; for (i = 0; i < numCalls; i++) { printf("writing bit %d\n", value); if(BitFilePutBit(value, bfp) == EOF) { perror("writing bit"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } value = 1 - value; } /* write ints as bits */ value = 0x11111111; for (i = 0; i < numCalls; i++) { printf("writing bits %0X\n", (unsigned int)value); if(BitFilePutBits(bfp, &value, (unsigned int)(8 * sizeof(int))) == EOF) { perror("writing bits"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } value += 0x11111111; } /* close bit file */ if (BitFileClose(bfp) != 0) { perror("closing file"); return (EXIT_FAILURE); } else { printf("closed file\n"); } /* reopen file for appending */ bfp = BitFileOpen("testfile", BF_APPEND); if (bfp == NULL) { perror("opening file"); return (EXIT_FAILURE); } /* append some chars */ value = (int)'A'; for (i = 0; i < numCalls; i++) { printf("appending char %c\n", value); if(BitFilePutChar(value, bfp) == EOF) { perror("appending char"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } value++; } /* write some bits from an integer */ value = 0x111; for (i = 0; i < numCalls; i++) { printf("writing 12 bits from an integer %03X\n", (unsigned int)value); if(BitFilePutBitsInt(bfp, &value, 12, sizeof(value)) == EOF) { perror("writing bits from an integer"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } value += 0x111; } /* convert to normal file */ fp = BitFileToFILE(bfp); if (fp == NULL) { perror("converting to stdio FILE"); return (EXIT_FAILURE); } else { printf("converted to stdio FILE\n"); } /* append some chars */ value = (int)'a'; for (i = 0; i < numCalls; i++) { printf("appending char %c\n", value); if(fputc(value, fp) == EOF) { perror("appending char to FILE"); if (fclose(fp) == EOF) { perror("closing stdio FILE"); } return (EXIT_FAILURE); } value++; } /* close file */ if (fclose(fp) == EOF) { perror("closing stdio FILE"); return (EXIT_FAILURE); } /* now read back writes */ /* open bit file */ bfp = BitFileOpen("testfile", BF_READ); if (bfp == NULL) { perror("reopening file"); return (EXIT_FAILURE); } /* read chars */ for (i = 0; i < numCalls; i++) { value = BitFileGetChar(bfp); if(value == EOF) { perror("reading char"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } else { printf("read %c\n", value); } } /* read single bits */ for (i = 0; i < numCalls; i++) { value = BitFileGetBit(bfp); if(value == EOF) { perror("reading bit"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } else { printf("read bit %d\n", value); } } /* read ints as bits */ for (i = 0; i < numCalls; i++) { if(BitFileGetBits(bfp, &value, (unsigned int)(8 * sizeof(int))) == EOF) { perror("reading bits"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } else { printf("read bits %0X\n", (unsigned int)value); } } if (BitFileByteAlign(bfp) == EOF) { fprintf(stderr, "failed to align file\n"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } else { printf("byte aligning file\n"); } /* read appended characters */ for (i = 0; i < numCalls; i++) { value = BitFileGetChar(bfp); if(value == EOF) { perror("reading char"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } else { printf("read %c\n", value); } } /* read some bits into an integer */ for (i = 0; i < numCalls; i++) { value = 0; if(BitFileGetBitsInt(bfp, &value, 12, sizeof(value)) == EOF) { perror("reading bits from an integer"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } else { printf("read 12 bits into an integer %03X\n", (unsigned int)value); } } /* convert to stdio FILE */ fp = BitFileToFILE(bfp); if (fp == NULL) { perror("converting to stdio FILE"); return (EXIT_FAILURE); } else { printf("converted to stdio FILE\n"); } /* read append some chars */ value = (int)'a'; for (i = 0; i < numCalls; i++) { value = fgetc(fp); if(value == EOF) { perror("stdio reading char"); if (0 != BitFileClose(bfp)) { perror("closing bitfile"); } return (EXIT_FAILURE); } else { printf("stdio read %c\n", value); } } /* close file */ if (fclose(fp) == EOF) { perror("closing stdio FILE"); return (EXIT_FAILURE); } return(EXIT_SUCCESS); }