int lz77_compress(const unsigned char *original, unsigned char **compressed,
		  int size)
{

	unsigned long token;

	unsigned char window[LZ77_WINDOW_SIZE],
	    buffer[LZ77_BUFFER_SIZE], *comp, *temp, next;

	int offset, length, remaining, tbits, hsize, ipos, opos, tpos, i;

/*****************************************************************************
*  Make the pointer to the compressed data not valid until later.            *
*****************************************************************************/

	*compressed = NULL;

/*****************************************************************************
*  Write the header information.                                             *
*****************************************************************************/

	hsize = sizeof(int);

	if ((comp = (unsigned char *)malloc(hsize)) == NULL)
		return -1;

	memcpy(comp, &size, sizeof(int));

/*****************************************************************************
*  Initialize the sliding window and the look-ahead buffer.                  *
*****************************************************************************/

	memset(window, 0, LZ77_WINDOW_SIZE);
	memset(buffer, 0, LZ77_BUFFER_SIZE);

/*****************************************************************************
*  Load the look-ahead buffer.                                               *
*****************************************************************************/

	ipos = 0;

	for (i = 0; i < LZ77_BUFFER_SIZE && ipos < size; i++) {

		buffer[i] = original[ipos];
		ipos++;

	}

/*****************************************************************************
*  Compress the data.                                                        *
*****************************************************************************/

	opos = hsize * 8;
	remaining = size;

	while (remaining > 0) {

		if ((length = compare_win(window, buffer, &offset, &next)) != 0) {

      /***********************************************************************
      *  Encode a phrase token.                                              *
      ***********************************************************************/

			token = 0x00000001 << (LZ77_PHRASE_BITS - 1);

      /***********************************************************************
      *  Set the offset where the match was found in the sliding window.     *
      ***********************************************************************/

			token =
			    token | (offset <<
				     (LZ77_PHRASE_BITS - LZ77_TYPE_BITS -
				      LZ77_WINOFF_BITS));

      /***********************************************************************
      *  Set the length of the match.                                        *
      ***********************************************************************/

			token =
			    token | (length <<
				     (LZ77_PHRASE_BITS - LZ77_TYPE_BITS -
				      LZ77_WINOFF_BITS - LZ77_BUFLEN_BITS));

      /***********************************************************************
      *  Set the next symbol in the look-ahead buffer after the match.       *
      ***********************************************************************/

			token = token | next;

      /***********************************************************************
      *  Set the number of bits in the token.                                *
      ***********************************************************************/

			tbits = LZ77_PHRASE_BITS;

		}

		else {

      /***********************************************************************
      *  Encode a symbol token.                                              *
      ***********************************************************************/

			token = 0x00000000;

      /***********************************************************************
      *  Set the unmatched symbol.                                           *
      ***********************************************************************/

			token = token | next;

      /***********************************************************************
      *  Set the number of bits in the token.                                *
      ***********************************************************************/

			tbits = LZ77_SYMBOL_BITS;

		}

   /**************************************************************************
   *  Ensure that the token is in big-endian format.                         *
   **************************************************************************/

		token = htonl(token);

   /**************************************************************************
   *  Write the token to the buffer of compressed data.                      *
   **************************************************************************/

		for (i = 0; i < tbits; i++) {

			if (opos % 8 == 0) {

	 /********************************************************************
         *  Allocate another byte for the buffer of compressed data.         *
         ********************************************************************/

				if ((temp =
				     (unsigned char *)realloc(comp,
							      (opos / 8) +
							      1)) == NULL) {

					free(comp);
					return -1;

				}

				comp = temp;

			}

			tpos = (sizeof(unsigned long) * 8) - tbits + i;
			bit_set(comp, opos,
				bit_get((unsigned char *)&token, tpos));
			opos++;

		}

   /**************************************************************************
   *  Adjust the phrase length to account for the unmatched symbol.          *
   **************************************************************************/

		length++;

   /**************************************************************************
   *  Copy data from the look-ahead buffer to the sliding window.            *
   **************************************************************************/

		memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length);
		memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length);

   /**************************************************************************
   *  Read more data into the look-ahead buffer.                             *
   **************************************************************************/

		memmove(&buffer[0], &buffer[length], LZ77_BUFFER_SIZE - length);

		for (i = LZ77_BUFFER_SIZE - length;
		     i < LZ77_BUFFER_SIZE && ipos < size; i++) {

			buffer[i] = original[ipos];
			ipos++;

		}

   /**************************************************************************
   *  Adjust the total symbols remaining by the phrase length.               *
   **************************************************************************/

		remaining = remaining - length;

	}

/*****************************************************************************
*  Point to the buffer of compressed data.                                   *
*****************************************************************************/

	*compressed = comp;

/*****************************************************************************
*  Return the number of bytes in the compressed data.                        *
*****************************************************************************/

	return ((opos - 1) / 8) + 1;

}
Beispiel #2
0
int lz77_compress(const unsigned char *original, unsigned char **compressed, int size) {
    unsigned char window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE], *comp, *temp, next;
    int offset, length, remaining, hsize, ipos, opos, tpos, i, tbits, token;

    /* Make pointer to compressed data not valid until later */
    *compressed = NULL;

    /* Write header info */
    hsize = sizeof(int);
    if ((comp = (unsigned char *) malloc(hsize)) == NULL)
        return -1;
    memcpy(comp, &size, sizeof(int));

    /* Initialize the sliding window and look-ahead buffer */
    memset(window, 0, LZ77_WINDOW_SIZE);
    memset(buffer, 0, LZ77_BUFFER_SIZE);

    /* Load look-ahead buffer */
    ipos = 0;
    for (i = 0; i < LZ77_BUFFER_SIZE && ipos < size; i++) {
        buffer[i] = original[ipos];
        ipos++;
    }

    /* Compress data */
    opos = hsize * 8;
    remaining = size;

    while (remaining > 0) {
        if ((length = compare_win(window, buffer, &offset, &next)) != 0) {
            if (DEBUG) printf("PHRASE TOKEN\n");
            /* Encode a phrase token */
            token = 0x00000001;
            dump("token before", token, sizeof(int) * 8);
            token = 0x00000001 << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS);
            dump("token", token, sizeof(int) * 8);

            /* Set offset where match was found in the sliding window */
            dump("offset", offset, sizeof(int) * 8);
            token = token | (offset << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS));
            int offset_shifted = offset << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS);
            if (DEBUG) printf("bitshift = %d\n", LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS);
            dump("offset after bitshift", offset_shifted, sizeof(int) * 8);

            dump("token", token, sizeof(int) * 8);

            /* Set length of match */
            token = token | (length << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS - LZ77_BUFLEN_BITS));
            dump("length", length, LZ77_WINOFF_BITS);
            dump("token", token, sizeof(int) * 8);

            /* Set next symbol in the look-ahead buffer after the match */
            token = token | next;
            dump("next", next, sizeof(char) * 8);

            /* Set number of bits in the token */
            tbits = LZ77_PHRASE_BITS;
            dump("token", token, sizeof(int) * 8);
        } else {
            if (DEBUG) printf("SYMBOL TOKEN\n");
            /* Encode a symbol token */
            token = 0x00000000;

            /* Set the unmatched symbol */
            token = token | next;
            dump("next", next, sizeof(char) * 8);

            /* Set number of bits in the token */
            tbits = LZ77_SYMBOL_BITS;
            dump("token", token, sizeof(int) * 8);
        }

        /* Ensure that the token is in big-endian format */
        token = htonl(token);
        dump("token big endian", token, sizeof(int) * 8);

        /* Write the token to the buffer of compressed data */
        if (DEBUG) printf("tbits = %d\n", tbits);
        for (i = 0; i < tbits; i++) {
            if (opos % 8 == 0) {
                /* Allocate another byte for buffer of compressed data */
                if ((temp = (unsigned char *) realloc(comp, (opos / 8) + 1)) == NULL) {
                    free(comp);
                    return -1;
                }

                comp = temp;
            }

            /* Book had sizeof(unsigned long) which I think is wrong */
            /* Calculating appropriate tpos for big endian format */
            tpos = (sizeof(int) * 8) - tbits + i;
            if (DEBUG) printf("tpos = %d, opos = %d\n", tpos, opos);

            bit_set(comp, opos, bit_get((unsigned char *) &token, tpos));

            if (DEBUG) {
                printf("comp = ");
                int j;
                for (j = 0; j <= opos; j++) {
                    if (j % 4 == 0) printf(" ");
                    printf("%d", bit_get(comp, j));
                }
                printf("\n");
            }
            opos++;
        }

        /* Adjust the phrase length to account for unmatched symbol */
        length++;

        /* Copy data from look-ahead buffer to sliding window */
        memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length);
        memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length);

        /* Read more data into look-ahead buffer */
        memmove(&buffer[0], &buffer[length], LZ77_BUFFER_SIZE - length);
        for (i = LZ77_BUFFER_SIZE - length; i < LZ77_BUFFER_SIZE && ipos < size; i++) {
            buffer[i] = original[ipos];
            ipos++;
        }

        /* Adjust total symbols remaining by phrase length */
        remaining = remaining - length;
    }

    /* Point to the buffer of compressed data */
    *compressed = comp;

    /* Return the number of bytes in compressed data */
    return ((opos - 1) / 8) + 1;
}