void encode(char **data, int *length) { char *result = malloc(*length * 2); *length = rle_encode(*data,result, *length); free(*data); *data = result; }
int main (int argc, char **argv) { CallbackData data; parse_args (&data, &argc, &argv); load_fasta (&data); load_bsq (&data); rle_encode (&data); cleanup_data (&data); return 0; }
TEST(rle, encode_overflow) { const uint8_t decode_data[] = { 0x12, 0x34, 0x56, 0x78, 0x90, 0xAB, 0xCD, 0xEF }; const uint8_t encode_data[] = { 0x00, 0x08, 0x12, 0x34, 0x56, 0x78, 0x90, 0xAB, 0xCD, 0xEF }; struct rle_test_data_s { const uint8_t *decode_data; const uint8_t *encode_data; size_t numof_decode_data; size_t numof_encode_data; } rtd[] = { { decode_data, encode_data, sizeof(decode_data), sizeof(encode_data) } }; size_t numof_rle_data = sizeof(rtd)/sizeof(rtd[0]); int32_t i; for (i=0; i<(int32_t)numof_rle_data; i++) { struct rle_test_data_s *p = &rtd[i]; uint8_t work[8]; memset(work, 0, sizeof(work)); size_t act_len = rle_encode(p->decode_data, p->numof_decode_data, work, sizeof(work)); /* printf("<encode>\n"); */ dump(p->decode_data, p->numof_decode_data); dump(work, act_len); /* printf("\n"); */ TEST_ASSERT_EQUAL(0, act_len); } }
/* Entry point for the program. Opens the file, and executes the mode specified by command line args */ int main(int argc, char* argv[]) { if(argc != 3 || (strcmp(argv[1], "-c") != 0 && strcmp(argv[1], "-d") != 0 && strcmp(argv[1], "-t") != 0)) { printf("usage: huff [-c | -d | -t] file\n"); return -1; } /* Execute the correct mode */ if(strcmp(argv[1], "-c") == 0) { //append the extension to the name const char * extension = ".temp"; char *tempFile = malloc(strlen(argv[2])+strlen(extension) + 1); strncpy(tempFile, argv[2], strlen(argv[2])); strcat(tempFile, extension); //RLE encode the file rle_encode(argv[2], tempFile); //create a vairiable to hold hold the file length unsigned long long fileLength = 0; //get a buffer of the contents of the file as a unsigned char* unsigned char *file_pointer = openFile(tempFile, &fileLength); //huff compress the output. compress(file_pointer, fileLength, argv[2]); free(file_pointer); //free(tempFile); remove(tempFile); } else if(strcmp(argv[1], "-d") == 0) { unsigned long long fileLength = 0; unsigned char *file_pointer = openFile(argv[2], &fileLength); decompress(file_pointer, fileLength, argv[2]); //remove the .hurl extension from the fileName; char *tempFileName = calloc(strlen(argv[2]), sizeof(char)); strncpy(tempFileName, argv[2], strlen(argv[2]) - strlen(".hurl")); strncat(tempFileName, ".temp", sizeof(".temp")); char *outputFileName = calloc(sizeof(char), strlen(argv[2])); strncpy(outputFileName, argv[2], strlen(argv[2])); outputFileName[strlen(outputFileName) -5] = '\0'; rle_decode(tempFileName, outputFileName); free(file_pointer); remove(tempFileName); } else if(strcmp(argv[1], "-t") == 0) { unsigned long long fileLength = 0; unsigned char *file_pointer = openFile(argv[2], &fileLength); print_table(file_pointer, fileLength, argv[2]); free(file_pointer); } // free(file_pointer); return 0; }
int main (int argc, char** argv) { FILE* file; char* filename; int filesize; char filedata [327680]; char* rle_output; int rle_length; char* rld_output; int rld_length; unsigned int i,j; if (argc < 2) { printf ("This program requires a filename...\n"); return -1; } filename = argv [1]; file = fopen (filename, "rb"); if (! file) { printf ("Could not open file: '%s'!\n", filename); return -1; } fseek (file, 0, SEEK_END); filesize = ftell (file); rewind (file); fread (filedata, filesize, 1, file); fclose (file); rle_output = rle_encode (filedata, filesize, &rle_length); printf ("Size before: %d\n" "Size after: %d\n" "Space saved: %2.2f%%\n", filesize, rle_length, 100.0f * (1.0f - ((float)rle_length / (float)filesize))); // dump (rle_output, rle_length); rld_output = rle_decode (rle_output, rle_length, &rld_length); printf ("Runlength Decode Size: %d\n", rld_length); j = 0; for (i = 0; i < filesize; i++) { j += (unsigned int)filedata [i]; } printf ("Checksum Before: %X\n", j); j = 0; for (i = 0; i < rld_length; i++) { j += (unsigned int)rld_output [i]; } printf ("Checksum After: %X\n", j); return 0; }
/* * Given a valid huffman mode, this function will generate the compression table that lists the codes for each * of the ascii characters that the compression scheme uses. That table is placed into the pOutTable passed * in by the caller. * * Table will have 256 entries and look like: * 01001 * 001 * 0111 * ... * 00001 */ eFileCode GenerateTableAndCompressOrDecompress(eMode huffmanMode, const char *fileName) { eFileCode fileCode; // And error code which tells if the operation was successful or not. FILE *pFile = fopen(fileName, "r"); // Open file for reading. fileCode = FILE_SUCCESS; // Check that file was successfully opened. If not, return an error code. if (pFile == NULL) { return FILE_MISSING; } switch(huffmanMode) { case DECOMPRESS_MODE: { // If the file is a compressed huffman file, then use the get table for huff function to open it. char huffmanEncodings[ENTRIES * ENTRY_LENGTH]; unsigned long long lengthOfFile = huffmanEncodingsFromFile(pFile, huffmanEncodings); huffResult resultArray[ENTRIES]; createHuffResultArrayFromFileEncodings(huffmanEncodings, resultArray); huffNode huffmanTree[ENTRIES + ENTRIES - 1]; createDecodeTreeFromResultArray(resultArray, huffmanTree); FILE *rle_file = xtmpfile(); writeCompressedFileToNonCompressedOutput(pFile, rle_file, lengthOfFile, &huffmanTree[0]); rewind(rle_file); int nameLength = strlen(fileName) - strlen(".hurl"); char newFileName[nameLength + 1]; strncpy(newFileName, fileName, nameLength); newFileName[nameLength] = 0; FILE *pNewFile = xfopen(newFileName, "w"); rle_decode(rle_file, pNewFile); fclose(rle_file); fclose(pNewFile); break; } case TABLE_MODE_HURL: { // If we want a table and the file is .hurl, it may or may not be the same .hurl we want it to be. // We'll try to treat it as a .hurl but if it's the wrong format, we will treat it like a generic // file. char huffmanEncodings[ENTRIES * ENTRY_LENGTH]; huffmanEncodingsFromFile(pFile, huffmanEncodings); if (huffmanEncodings[0] != 0) { huffResult resultArray[ENTRIES]; createHuffResultArrayFromFileEncodings(huffmanEncodings, resultArray); printHuffResultArray(resultArray); } else { // File was a .hurl, but internally format doesn't fit. Oh well, we can still generate a compression table. // Maybe someone else in the world created their own .hurl file format. We need to handle that format. fseek(pFile, 0, SEEK_SET); // The frequency of occurrence of each character in the uncompressed file. unsigned long long pFrequencies[ENTRIES]; memset(pFrequencies, 0, sizeof(pFrequencies)); if (GenerateFrequenciesForGeneric(pFile, pFrequencies) == FILE_SUCCESS) { huffResult resultArray[ENTRIES]; fileCode = GetTableForGeneric(pFrequencies, resultArray); } else { fileCode = FILE_INVALID_FORMAT; } } break; } case TABLE_MODE_GENERIC: { // If the file is not a .hurl, then generate frequencies and then a table for the file. // The frequency of occurrence of each character in the uncompressed file. unsigned long long pFrequencies[ENTRIES]; memset(pFrequencies, 0, sizeof(pFrequencies)); if (GenerateFrequenciesForGeneric(pFile, pFrequencies) == FILE_SUCCESS) { huffResult resultArray[ENTRIES]; fileCode = GetTableForGeneric(pFrequencies, resultArray); printHuffResultArray(resultArray); } else { fileCode = FILE_INVALID_FORMAT; } break; } case COMPRESS_MODE: { FILE *rle_file = xtmpfile(); rle_encode(pFile, rle_file); rewind(rle_file); unsigned long long pFrequencies[ENTRIES]; memset(pFrequencies, 0, sizeof(pFrequencies)); if (GenerateFrequenciesForGeneric(rle_file, pFrequencies) != FILE_SUCCESS) { fileCode = FILE_INVALID_FORMAT; break; } huffResult resultArray[ENTRIES]; fileCode = GetTableForGeneric(pFrequencies, resultArray); char newFileName[strlen(fileName) + 6]; strcpy(newFileName, fileName); FILE *pNewFile = fopen(strcat(newFileName, ".hurl"), "w+"); if (pNewFile == NULL) { fileCode = FILE_INVALID_FORMAT; break; } writeNonCompressedFileToCompressedOutput(rle_file, pNewFile, resultArray); fclose(rle_file); fclose(pNewFile); break; } default: { // File must not be valid for the specified commandline options if we get this far. fileCode = FILE_INVALID_FORMAT; break; } } fclose(pFile); return fileCode; }
void dnCompareSnapshots( const snapshot_t *olds, const snapshot_t *news ) { char *p, *q; p = (char*)olds; q = (char*)news; int diff = 0; std::stringstream ss( std::ios::in | std::ios::out | std::ios::binary ); char mask[sizeof(snapshot_t)]; for (int i = 0; i < sizeof( snapshot_t ); i++, p++, q++ ) { if ( *q != *p ) { diff++; mask[i] = 1; ss.write(p, 1); } else { mask[i] = 0; } } #if 0 char *mask_lz4 = (char*)malloc( LZ4_compressBound( sizeof(mask) ) ); int mask_lz4_size = LZ4_compress( (char*)&mask[0], (char*)mask_lz4, sizeof(mask)); free( mask_lz4 ); #endif unsigned char bitmask[sizeof(mask)/8]; for ( int i = 0; i < sizeof( mask )/8; i++ ) { bitmask[i] = mask[ i*8 + 0 ] << 0 & mask[ i*8 + 1 ] << 1 & mask[ i*8 + 2 ] << 2 & mask[ i*8 + 3 ] << 3 & mask[ i*8 + 4 ] << 4 & mask[ i*8 + 5 ] << 5 & mask[ i*8 + 6 ] << 6 & mask[ i*8 + 7 ] << 7; } char *bitmask_lz4 = (char*)malloc( LZ4_compressBound( sizeof(bitmask) ) ); int bitmask_lz4_size = LZ4_compress((char*)&bitmask[0], (char*)bitmask_lz4, sizeof( bitmask )); free( bitmask_lz4 ); #if 0 char *mask_rle = (char*)malloc( sizeof(mask)*2 ); int mask_rle_size = rle_encode((char*)&mask[0], (char*)mask_rle, sizeof(mask)); free( mask_rle ); std::string dfs = ss.str(); char *payload_lz4 = (char*)malloc( LZ4_compressBound( dfs.size() ) ); int payload_lz4_size = LZ4_compress( dfs.c_str(), payload_lz4, dfs.size() ); free( payload_lz4 ); #endif #if 0 printf( "*** DATA INFO ***\n" ); printf( "%d bytes differ\n", diff ); printf( "mask size: %d\n", sizeof( mask )); printf( "bitmask size: %d\n", sizeof( bitmask ) ); printf( "mask lz4 size: %d\n", mask_lz4_size ); printf( "bitmask lz4 size: %d\n", bitmask_lz4_size ); printf( "mask rle size: %d\n", mask_rle_size ); printf( "diff stream size: %d\n", dfs.size() ); printf( "diff stream lz4 size: %d\n", payload_lz4_size ); #endif int deltasize = bitmask_lz4_size + ss.str().size(); printf( "snapshot delta size: %d\n", deltasize ); if ( deltasize < 8000 ) { num++; sum += deltasize; if ( deltasize > maxd ) { maxd = deltasize; } printf( "avg: %d max: %d\n", sum/num, maxd ); } }
int rle_encoded_size(unsigned char* inbuffer, unsigned int inlen){ return rle_encode(inbuffer, inlen,NULL, 0); }
TEST(rle, encode_decode) { const uint8_t decode_data_0[] = { 0x01, 0x32, 0x32, 0x32, 0x32, 0x32, 0x55, 0x37, 0x11, 0xAC, 0x8E, 0x97, 0xC9, 0xC9, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0x88 }; const uint8_t encode_data_0[] = { 0x01, 0x01, 0x05, 0x32, 0x00, 0x06, 0x55, 0x37, 0x11, 0xAC, 0x8E, 0x97, 0x02, 0xC9, 0x08, 0xA0, 0x01, 0x88 }; const uint8_t decode_data_1[] = { 0xB3, 0xB3, 0xB3, 0xB3, 0xB3, 0xB3, 0xB3, 0xB3, 0xB3, 0xB3, 0xB3, 0xB3 }; const uint8_t encode_data_1[] = { 0x0C, 0xB3 }; const uint8_t decode_data_2[] = { 0x12, 0x34, 0x56, 0x78, 0x90, 0xAB, 0xCD, 0xEF }; const uint8_t encode_data_2[] = { 0x00, 0x08, 0x12, 0x34, 0x56, 0x78, 0x90, 0xAB, 0xCD, 0xEF }; const uint8_t decode_data_3[] = { 0x12, 0x12, 0x12, 0x78, 0x90, 0xAB, 0xAB, 0xAB, 0xAB }; const uint8_t encode_data_3[] = { 0x03, 0x12, 0x00, 0x02, 0x78, 0x90, 0x04, 0xAB }; struct rle_test_data_s { const uint8_t *decode_data; const uint8_t *encode_data; size_t numof_decode_data; size_t numof_encode_data; } rtd[] = { { decode_data_0, encode_data_0, sizeof(decode_data_0), sizeof(encode_data_0) }, { decode_data_1, encode_data_1, sizeof(decode_data_1), sizeof(encode_data_1) }, { decode_data_2, encode_data_2, sizeof(decode_data_2), sizeof(encode_data_2) }, { decode_data_3, encode_data_3, sizeof(decode_data_3), sizeof(encode_data_3) } }; size_t numof_rle_data = sizeof(rtd)/sizeof(rtd[0]); int32_t i; for (i=0; i<(int32_t)numof_rle_data; i++) { struct rle_test_data_s *p = &rtd[i]; uint8_t work[32]; memset(work, 0, sizeof(work)); size_t act_len = rle_encode(p->decode_data, p->numof_decode_data, work, sizeof(work)); /* printf("<encode>\n"); */ dump(p->decode_data, p->numof_decode_data); dump(work, act_len); /* printf("\n"); */ TEST_ASSERT_EQUAL(p->numof_encode_data, act_len); TEST_ASSERT_EQUAL_UINT8_ARRAY(p->encode_data, work, act_len); memset(work, 0, sizeof(work)); act_len = rle_decode(p->encode_data, p->numof_encode_data, work, sizeof(work)); /* printf("<decode>\n"); */ dump(p->encode_data, p->numof_encode_data); dump(work, act_len); /* printf("\n"); */ TEST_ASSERT_EQUAL(p->numof_decode_data, act_len); TEST_ASSERT_EQUAL_UINT8_ARRAY(p->decode_data, work, act_len); } }
{ .iov_base = "foo", .iov_len = 3 }, { .iov_base = "foo", .iov_len = 3 }, { .iov_base = "bar", .iov_len = 3 } }; char buf[256]; size_t len = rle_encode(iov, 6, buf); assert(len == 17); struct iovec out[6] = {{0}}; rle_decode(buf, 17, out); assert(strncmp("hello", out[0].iov_base, out[0].iov_len) == 0); assert(strncmp("hello", out[1].iov_base, out[1].iov_len) == 0); assert(strncmp("hello", out[2].iov_base, out[2].iov_len) == 0); assert(strncmp("foo", out[3].iov_base, out[3].iov_len) == 0); assert(strncmp("foo", out[4].iov_base, out[4].iov_len) == 0); assert(strncmp("bar", out[5].iov_base, out[5].iov_len) == 0); } } return assert_failures();