int encode(UCHAR *encodeData, CodeInfo *codeInfo, UCHAR *data, int dataLen) { // 各バイト値の発生回数を数える TreeNode nodes[NODE_SIZE]; memset(nodes, 0, sizeof(nodes)); for (int i = 0; i < 22; i++) { nodes[i].chr = charSet[i]; } for (int i = 0; i < dataLen; i++) { nodes[convertChar2Index(data[i])].occurrence++; } // ハフマン木を作成 int root = CreateHuffmanTree(nodes, 22 - 1); // 深さ優先探索でバイト値→符号情報を作成 int codeInfoNum = generateCode(codeInfo, nodes, root, 0, 0); if (codeInfoNum > 22) { // charSetで指定されている文字以外が含まれたデータを読み込もうとしている return -1; } // 圧縮データをメモリに書き込み int encodeDataLen = writeEncodeData(encodeData, nodes, root, codeInfo, data, dataLen); return encodeDataLen; }
int main() { int map[27]; int show[27]; int len[27]; char input[1000]; const char end[] = "END"; int length, i, step; while(scanf("%s", input) > 0) { if(strcmp(input, end) == 0) { break; } for(i = 0; i < 27; i++) { map[i] = 0; } length = strlen(input); for(i = 0; i < length; i++) { if(input[i] == '_') { map[26]++; } else { map[input[i] - 'A']++; } } step = 0; for(i = 0; i < 27; i++) { if(map[i] != 0) { show[step] = map[i]; step++; } } HuffmanTree HT; CreateHuffmanTree(HT, step, show); for(i = 0; i < step; i++) { int p = HT[i].parent; int count = 0; while(p != -1) { p = HT[p].parent; count++; } len[i] = count; } if(step == 1) { len[0] = 1; } int sum = 0; for(i = 0; i < step; i++) { sum += len[i] * show[i]; } double rate =((double)length * 8 / sum); printf("%d %d %.1lf\n",length * 8,sum,rate ); } return 0; }
void Compression::HuffmanVLD() { std::fstream huf(path + std::string(".huf"),std::ios::in); std::fstream rld(path + std::string(".rlc"),std::ios::out); std::string str; huf >> str; std::stringstream ss(str); std::vector<std::string> split; for (std::string each;std::getline(ss, each, '/');split.push_back(each)); //std::cout << split.size() << std::endl; std::map<char,int> prob_table; for(size_t i = 0;i < split.size();i += 2){ prob_table.insert(std::make_pair(split[i].at(0),std::stoi(split[i + 1]))); } std::vector<HuffmanNode*> huffman_array; CreateHuffmanTree(prob_table,huffman_array); std::vector<HuffmanNode> huffman_table; CreateHuffmanCode(huffman_table,huffman_array[huffman_array.size()-1],std::string("")); bool null_flag = false; HuffmanNode *root = huffman_array[huffman_array.size()-1]; HuffmanNode *it = root; while(huf >> str){ for(size_t i = 0;i < str.size();++i){ char c = str[i]; if(c == '0'){ if(it->L){ it = it->L; } else{ null_flag = true; } } else if(c == '1'){ if(it->R){ it = it->R; } else{ null_flag = true; } } if(null_flag){ rld << it->data; null_flag = false; it = root; --i; } else if(i == str.size() - 1){ rld << it->data; null_flag = false; it = root; } } rld << std::endl; } huf.close(); }
yzw2v::huff::HuffmanTree::HuffmanTree(const vocab::Vocabulary& vocab) : tokens_(vocab.size()) , points_pool_{POINTS_BLOCK_SIZE} , code_pool_{CODES_BLOCK_SIZE} { CreateHuffmanTree(vocab, tokens_, points_pool_, code_pool_); }
void Compression::HuffmanVLC() { std::fstream rld(path + std::string(".rlc"),std::ios::in); std::fstream huf(path + std::string(".huf"),std::ios::out); std::vector<std::string> rlc; std::string str; while(rld >> str){ rlc.push_back(str); } std::map<char,int> prob_table; for(size_t i = 0;i < rlc.size();++i){ for(size_t j = 0;j < rlc[i].size();++j){ if(prob_table.insert(std::make_pair(rlc[i][j],1)).second){ } else{ ++prob_table[rlc[i][j]]; } } } for(std::map<char,int>::iterator it = prob_table.begin();it != prob_table.end();++it){ huf << it->first << "/" << it->second << "/"; } huf << std::endl; std::vector<HuffmanNode*> huffman_array; CreateHuffmanTree(prob_table,huffman_array); std::vector<HuffmanNode> huffman_table; CreateHuffmanCode(huffman_table,huffman_array[huffman_array.size()-1],std::string("")); for(size_t i = 0;i < rlc.size();++i){ for(size_t j = 0;j < rlc[i].size();++j){ huf << std::find(huffman_table.begin(),huffman_table.end(),rlc[i][j])->code; huffman_total_byte += std::find(huffman_table.begin(),huffman_table.end(),rlc[i][j])->code.size(); } huf << std::endl; } huffman_total_byte /= 8.0f; /* //binary file std::vector<std::vector<bool> > binary; for(size_t i = 0;i < rlc.size();++i){ std::vector<bool> temp; for(size_t j = 0;j < rlc[i].size();++j){ std::string code = std::find(huffman_table.begin(),huffman_table.end(),rlc[i][j])->code; for(size_t k = 0;k < code.size();++k){ temp.push_back(((code[k] == '0')?0:1)); } } binary.push_back(temp); } char c; for(size_t i = 0;i < binary.size();++i){ for(size_t j = 0;j < binary[i].size();++j){ if((j % 8) == 0){ huf << c; c = 0; } c += (binary[i][j] << (j % 8)); } } // std::fstream huf2(path + std::string(".huf"),std::ios::in); std::fstream hufd(path + std::string(".ddhuf"),std::ios::out); huf2 >> str; hufd << str; hufd << std::endl; while(!huf2.eof()){ huf2.get(c); for(int j = 0;j < 8;++j){ hufd << ((c & (1 << j))?'1':'0'); } hufd << std::endl; } */ rld.close(); huf.close(); }