void Compression::comp() { int ch; for ( cc = 255; cc-->0; ) { ff[cc].Fone = 1; ff[cc].Ftot = 2; } cc = 0; ch = in.r(); if (ch < 0) { out.w(-1); return; } if (ch == 0) { sw = 1; sww = 1; } else { sw = 0; sww = 0; } high = Top_value; low = Half - 1; ff[cc].Fone = 2; ff[cc].Ftot = 3; cc = 2; ch = in.r(); Zero_av = 1; bits_to_follow = 0; /* No bits to follow next. */ for (;;) { /* Loop through characters. */ if (ch < 0) break; encode_symbol(ch^sw, ff[cc]); if ( (ch^sw) == 1) ff[cc].Fone++; ff[cc].Ftot++; if (ch == 0 ) { cc = 2 * cc + 1; } else { cc = 2 * cc + 2; } if (cc >= 255 ) cc = 0; /* cc = 0; */ ch = in.r(); } encode_symbol(-1, ff[cc]); if (low == 0 && Zero_av == 1) { bit_plus_follow(0); } else bit_plus_follow(1); out.w(-2); }
/* * This is the compress routine. It shows the basic algorithm for * the compression programs used in this article. First, an input * characters is loaded. The modeling routines are called to * convert the character to a symbol, which has a high, low and * range. Finally, the arithmetic coder module is called to * output the symbols to the bit stream. */ void compress() { int i; char c; SYMBOL s; FILE *compressed_file; static char *input = "GLIB BATES"; compressed_file=fopen( "software/benchmarks/data/test.cmp", "wb" ); if ( compressed_file == NULL ) error_exit( "Could not open output file" ); puts( "Compressing..." ); initialize_output_bitstream(); initialize_arithmetic_encoder(); for ( i=0 ; ; ) { c = input[ i++ ]; convert_int_to_symbol( c, &s ); encode_symbol( compressed_file, &s ); if ( c == '\0' ) break; } flush_arithmetic_encoder( compressed_file ); flush_output_bitstream( compressed_file ); fclose( compressed_file); }
/* * The main procedure is similar to the main found in ARITH1E.C. It has * to initialize the coder and the model. It then sits in a loop reading * input symbols and encoding them. One difference is that every 256 * symbols a compression check is performed. If the compression ratio * falls below 10%, a flush character is encoded. This flushes the encod * ing model, and will cause the decoder to flush its model when the * file is being expanded. The second difference is that each symbol is * repeatedly encoded until a successful encoding occurs. When trying to * encode a character in a particular order, the model may have to * transmit an ESCAPE character. If this is the case, the character has * to be retransmitted using a lower order. This process repeats until a * successful match is found of the symbol in a particular context. * Usually this means going down no further than the order -1 model. * However, the FLUSH and DONE symbols drop back to the order -2 model. * */ void CompressFile(FILE *input,BIT_FILE *output,int argc,char *argv[]) { SYMBOL s; int c; int escaped; int flush = 0; long int text_count = 0; initialize_options( argc, argv ); initialize_model(); initialize_arithmetic_encoder(); for ( ; ; ) { if ( ( ++text_count & 0x0ff ) == 0 ) flush = check_compression( input, output ); if ( !flush ) c = getc( input ); else c = FLUSH; if ( c == EOF ) c = DONE; do { escaped = convert_int_to_symbol( c, &s); encode_symbol( output, &s ); } while ( escaped ); if ( c == DONE ) break; if ( c == FLUSH ) { flush_model(); flush = 0; } update_model( c ); add_character_to_model( c ); } flush_arithmetic_encoder( output ); }
// write_class // Writes CLASS_xxx and CLASSD_xxx void write_class(Val stream, Val klass) { Class* pClass = klass->Decode<Class>(); format(stream, L"// ~S~%", pClass->m_name); write_val(stream, L"CLASS_", encode_symbol(pClass->m_name), klass); write_val(stream, L"ty_", encode_symbol(pClass->m_name), pClass->m_name); Val classd = pClass->m_instanced; if (nil != classd) { write_val(stream, L"CLASSD_", encode_symbol(pClass->m_name), pClass->m_instanced ); } } // write_class
/** * Writes a byte to the output file using the arithmetic encoder * @param[in] byte the data to write * @param[in] file the output file */ static void writeByte(int byte, FILE *file) { SYMBOL s; byte = byte & 0x000000FF; s.scale = 256; s.low_count = byte; s.high_count = byte + 1; encode_symbol(file, &s); }
void HuffmanConverter::decode_file(const char *inFile, const char *outFile) { // read from table and build frequency tree // build prefix tree // read .huf file and get bit string during reaching to leaf node of prefix tree // At each bit string, map it to one character in inverted encode table // Nessasary : .huf .tab if (inFile == nullptr) { std::cerr << "Input file name is missing!\n"; return; } std::string tpath = format_path_name(path_freq, inFile, postfix_tab); // table file's path std::string hpath = format_path_name(path_encoded, inFile, postfix_huf); // input file's path std::string dpath = std::string(path_decoded).append(inFile); // output file's path std::ifstream hufFile(hpath, std::ios::binary); std::ifstream tabFile(tpath); std::ofstream deFile(dpath); /*std::cout << tpath << "\n"; std::cout << hpath << "\n"; std::cout << dpath << "\n";*/ if (!tabFile.is_open()) { std::cerr << "tab file doesn't exist" << std::endl; return; } if (!hufFile.is_open()) { std::cerr << "target file doesn't exist" << std::endl; return; } // empty fTab.clear(); eTab.clear(); unsigned last_pos = parse_freq_table(tabFile); build_prefix_tree(); encode_symbol(); // read all binaries into memory unsigned long long before_sz = get_file_size(hpath); char *buf = new char[before_sz]; hufFile.read(buf, before_sz); std::string bit_string = ""; build_bit_string(buf, before_sz, bit_string, last_pos); //std::cout << parse_bitstr(bit_string); deFile << parse_bitstr(bit_string); unsigned long long after_sz = get_file_size(dpath); tabFile.close(); hufFile.close(); printf("%-20s : %s\n", "File Name", inFile); printf("%-20s : %llu\n", "File Size", before_sz); printf("%-20s : %s\n", "Table Name", tpath.c_str()); printf("%-20s : %s\n", "Decoded Location", path_decoded); printf("%-20s : %llu -> %llu\n","Size Change(bytes)", before_sz, after_sz); double unzip_rate = 100.0 + ((double)after_sz/before_sz)*100.0; printf("%-20s : %-4.2f%%\n","Decompression Rate", unzip_rate); }
void update_utf8_symbol(void) { charset_symbol_set *p; utf8_symbol_set.width = WcOption.east_asian_width ? 2 : 1; for (p = charset_symbol_list; p->charset; p++) { if (p->charset == WC_CES_UTF_8) { encode_symbol(p->symbol); break; } } }
static void encode_arith_symbol(MscCoderArithModel *arithModel, PutBitContext *pb, int value) { MscCoderArithSymbol arithSymbol; // convert value to range symbol convert_int_to_symbol(arithModel, value, &arithSymbol); // encode symbol by arith encoder encode_symbol(pb, &arithSymbol); // update arithmetic model update_model(arithModel, value); }
/** * Writes the alphabet to the output file. * @param[in] file the output file. */ static void writeAlphabet(FILE *file) { int i, j, last = UCHAR_MAX + 1; SYMBOL s; long cost; /* write alphabet size */ writeByte(alphasize, file); cost = bit_ftell_output(file); if (alphasize <= UCHAR_MAX) { if (alphasize < 128) { /* send alphabet */ for (i=alphasize-1; i>=0; i--) { s.scale = last; s.low_count = characters[i]; s.high_count = characters[i] + 1; encode_symbol(file, &s); last = characters[i]; } } else { /* send complement of alphabet */ for (i=UCHAR_MAX, j=alphasize-1; i>=0; i--) { if (j<0 || characters[j] != i) { s.scale = last; s.low_count = i; s.high_count = i+1; encode_symbol(file, &s); last = i; } else { j--; } } } } printf("Alphabet cost: %ld\n", bit_ftell_output(file) - cost); }
size_t ArithmeticUtilEncoder::encode(byte* start, uint64 size) { std::vector<uint64> counts(256,0); long p=out->getPos(); for(byte* b = start; b!= (start+size); b++) counts[*b]++; int sum=0; for(int i=0;i<256;i++) sum+=(counts[i]=counts[i]*SCALE/size); long header_pos = out->getPos(); for(int i=0;i<6;i++) out->writeByte(0); out->write48bits(size,header_pos); long bits_pos = out->getPos(); for(int i=0;i<6;i++) out->writeByte(0); bytes_used=0; bytes_used += 6*2; bits_used=0; int it=0; int bsum=0; for(int i=0;i<256;i++) { if(counts[i]==0) { if(sum==SCALE) { while(counts[it%256]<=1) it++; counts[(it++)%256]--; } else sum++; counts[i]++; } } int add=SCALE-sum; for(int i=0;i<256;i++) { counts[i]+=add/256; if(i< (add%256)){ counts[i]++; } } std::vector<uint64> diffs(256,0); long a=bytes_used; bytes_used += utils::gammaEncode(counts,out); long pp=bytes_used; std::vector<SYMBOL> symbols(256); int cumul=0; for(int i=0;i<256;i++) { symbols[i].scale=SCALE; symbols[i].low_count=cumul; symbols[i].high_count= cumul = cumul + counts[i]; } size_t tmp=bytes_used; byte* ptr=start; ptr=start; while(ptr != start + size) { encode_symbol(symbols[*ptr]); ptr++; } flush(); while(bytes_used-tmp<4) { out->writeByte(0); bytes_used++; bits_used+=8; } //std::cout<<"bytes used for counts: "<<1.0*(pp-a)/(bytes_used-a)<<"\n"; std::cout<<"bytes used for counts: "<<(pp-a)<<"\n"; out->write48bits(bits_used/8,bits_pos); return bytes_used; }