int lz77_compress(const unsigned char *original, unsigned char **compressed, int size) { unsigned long token; unsigned char window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE], *comp, *temp, next; int offset, length, remaining, tbits, hsize, ipos, opos, tpos, i; /***************************************************************************** * Make the pointer to the compressed data not valid until later. * *****************************************************************************/ *compressed = NULL; /***************************************************************************** * Write the header information. * *****************************************************************************/ hsize = sizeof(int); if ((comp = (unsigned char *)malloc(hsize)) == NULL) return -1; memcpy(comp, &size, sizeof(int)); /***************************************************************************** * Initialize the sliding window and the look-ahead buffer. * *****************************************************************************/ memset(window, 0, LZ77_WINDOW_SIZE); memset(buffer, 0, LZ77_BUFFER_SIZE); /***************************************************************************** * Load the look-ahead buffer. * *****************************************************************************/ ipos = 0; for (i = 0; i < LZ77_BUFFER_SIZE && ipos < size; i++) { buffer[i] = original[ipos]; ipos++; } /***************************************************************************** * Compress the data. * *****************************************************************************/ opos = hsize * 8; remaining = size; while (remaining > 0) { if ((length = compare_win(window, buffer, &offset, &next)) != 0) { /*********************************************************************** * Encode a phrase token. * ***********************************************************************/ token = 0x00000001 << (LZ77_PHRASE_BITS - 1); /*********************************************************************** * Set the offset where the match was found in the sliding window. * ***********************************************************************/ token = token | (offset << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS)); /*********************************************************************** * Set the length of the match. * ***********************************************************************/ token = token | (length << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS - LZ77_BUFLEN_BITS)); /*********************************************************************** * Set the next symbol in the look-ahead buffer after the match. * ***********************************************************************/ token = token | next; /*********************************************************************** * Set the number of bits in the token. * ***********************************************************************/ tbits = LZ77_PHRASE_BITS; } else { /*********************************************************************** * Encode a symbol token. * ***********************************************************************/ token = 0x00000000; /*********************************************************************** * Set the unmatched symbol. * ***********************************************************************/ token = token | next; /*********************************************************************** * Set the number of bits in the token. * ***********************************************************************/ tbits = LZ77_SYMBOL_BITS; } /************************************************************************** * Ensure that the token is in big-endian format. * **************************************************************************/ token = htonl(token); /************************************************************************** * Write the token to the buffer of compressed data. * **************************************************************************/ for (i = 0; i < tbits; i++) { if (opos % 8 == 0) { /******************************************************************** * Allocate another byte for the buffer of compressed data. * ********************************************************************/ if ((temp = (unsigned char *)realloc(comp, (opos / 8) + 1)) == NULL) { free(comp); return -1; } comp = temp; } tpos = (sizeof(unsigned long) * 8) - tbits + i; bit_set(comp, opos, bit_get((unsigned char *)&token, tpos)); opos++; } /************************************************************************** * Adjust the phrase length to account for the unmatched symbol. * **************************************************************************/ length++; /************************************************************************** * Copy data from the look-ahead buffer to the sliding window. * **************************************************************************/ memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length); memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length); /************************************************************************** * Read more data into the look-ahead buffer. * **************************************************************************/ memmove(&buffer[0], &buffer[length], LZ77_BUFFER_SIZE - length); for (i = LZ77_BUFFER_SIZE - length; i < LZ77_BUFFER_SIZE && ipos < size; i++) { buffer[i] = original[ipos]; ipos++; } /************************************************************************** * Adjust the total symbols remaining by the phrase length. * **************************************************************************/ remaining = remaining - length; } /***************************************************************************** * Point to the buffer of compressed data. * *****************************************************************************/ *compressed = comp; /***************************************************************************** * Return the number of bytes in the compressed data. * *****************************************************************************/ return ((opos - 1) / 8) + 1; }
int lz77_compress(const unsigned char *original, unsigned char **compressed, int size) { unsigned char window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE], *comp, *temp, next; int offset, length, remaining, hsize, ipos, opos, tpos, i, tbits, token; /* Make pointer to compressed data not valid until later */ *compressed = NULL; /* Write header info */ hsize = sizeof(int); if ((comp = (unsigned char *) malloc(hsize)) == NULL) return -1; memcpy(comp, &size, sizeof(int)); /* Initialize the sliding window and look-ahead buffer */ memset(window, 0, LZ77_WINDOW_SIZE); memset(buffer, 0, LZ77_BUFFER_SIZE); /* Load look-ahead buffer */ ipos = 0; for (i = 0; i < LZ77_BUFFER_SIZE && ipos < size; i++) { buffer[i] = original[ipos]; ipos++; } /* Compress data */ opos = hsize * 8; remaining = size; while (remaining > 0) { if ((length = compare_win(window, buffer, &offset, &next)) != 0) { if (DEBUG) printf("PHRASE TOKEN\n"); /* Encode a phrase token */ token = 0x00000001; dump("token before", token, sizeof(int) * 8); token = 0x00000001 << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS); dump("token", token, sizeof(int) * 8); /* Set offset where match was found in the sliding window */ dump("offset", offset, sizeof(int) * 8); token = token | (offset << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS)); int offset_shifted = offset << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS); if (DEBUG) printf("bitshift = %d\n", LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS); dump("offset after bitshift", offset_shifted, sizeof(int) * 8); dump("token", token, sizeof(int) * 8); /* Set length of match */ token = token | (length << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS - LZ77_BUFLEN_BITS)); dump("length", length, LZ77_WINOFF_BITS); dump("token", token, sizeof(int) * 8); /* Set next symbol in the look-ahead buffer after the match */ token = token | next; dump("next", next, sizeof(char) * 8); /* Set number of bits in the token */ tbits = LZ77_PHRASE_BITS; dump("token", token, sizeof(int) * 8); } else { if (DEBUG) printf("SYMBOL TOKEN\n"); /* Encode a symbol token */ token = 0x00000000; /* Set the unmatched symbol */ token = token | next; dump("next", next, sizeof(char) * 8); /* Set number of bits in the token */ tbits = LZ77_SYMBOL_BITS; dump("token", token, sizeof(int) * 8); } /* Ensure that the token is in big-endian format */ token = htonl(token); dump("token big endian", token, sizeof(int) * 8); /* Write the token to the buffer of compressed data */ if (DEBUG) printf("tbits = %d\n", tbits); for (i = 0; i < tbits; i++) { if (opos % 8 == 0) { /* Allocate another byte for buffer of compressed data */ if ((temp = (unsigned char *) realloc(comp, (opos / 8) + 1)) == NULL) { free(comp); return -1; } comp = temp; } /* Book had sizeof(unsigned long) which I think is wrong */ /* Calculating appropriate tpos for big endian format */ tpos = (sizeof(int) * 8) - tbits + i; if (DEBUG) printf("tpos = %d, opos = %d\n", tpos, opos); bit_set(comp, opos, bit_get((unsigned char *) &token, tpos)); if (DEBUG) { printf("comp = "); int j; for (j = 0; j <= opos; j++) { if (j % 4 == 0) printf(" "); printf("%d", bit_get(comp, j)); } printf("\n"); } opos++; } /* Adjust the phrase length to account for unmatched symbol */ length++; /* Copy data from look-ahead buffer to sliding window */ memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length); memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length); /* Read more data into look-ahead buffer */ memmove(&buffer[0], &buffer[length], LZ77_BUFFER_SIZE - length); for (i = LZ77_BUFFER_SIZE - length; i < LZ77_BUFFER_SIZE && ipos < size; i++) { buffer[i] = original[ipos]; ipos++; } /* Adjust total symbols remaining by phrase length */ remaining = remaining - length; } /* Point to the buffer of compressed data */ *compressed = comp; /* Return the number of bytes in compressed data */ return ((opos - 1) / 8) + 1; }