queue *HuffmanTree(FILE *fin, uint32_t *numberOfChars){ unsigned char c; int v[256] = {0}; int i; struct huffman Huffman[256]; /* Read all characters from file */ while(fscanf(fin, "%c", &c) && !feof(fin)){ v[c]++; (*numberOfChars)++; } /* Exception * NULL counted more than once */ if(v[0] > 0) v[0]--; /* Create node for each character */ int countNodes = 0; for(i = 0; i < 256; i++){ if(v[i] != 0){ Huffman[countNodes].caracter = i; Huffman[countNodes].frequency = (float)(v[i])/(float)(*numberOfChars); countNodes++; } } /* Priority queue */ queue *head = createQueue(Huffman, countNodes); /* Create HuffmanTree */ queue *root = createHuffmanTree(head); return root; }
// encoder void Huffman::encode() { cout << "Begin encoding..." << endl; cout << "Open file..." << endl; inFile_.open(inFileName_.c_str(),ios::in); cout << "Calculating frequency of all ascii chars..." << endl; createNodeArray(); cout << "Done!" << endl; inFile_.close(); cout << "Creating Priority-Queue..." << endl; createPq(); cout << "Done!" << endl; cout << "Creating Huffman-Tree..." << endl; createHuffmanTree(); cout << "Done!" << endl; cout << "Calculating Huffman-Code..." << endl; calculateHuffmanCode(); cout << "Done!" << endl; cout << "Saving to outputfile..." << endl; saveToFile(); cout << "Done!" << endl; cout << "Ecoding finished!" << endl; }
Vocabulary::Vocabulary(const char * train_file, int min_count, bool doctag) : m_vocab(NULL), m_vocab_size(0), m_train_words(0), m_vocab_capacity(1000), m_vocab_hash(NULL), m_min_reduce(1), m_min_count(min_count), m_doctag(doctag) { if(m_doctag) m_min_count = 1; m_vocab = (struct vocab_word_t *)calloc(m_vocab_capacity, sizeof(struct vocab_word_t)); m_vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int)); loadFromTrainFile(train_file); if(!m_doctag) createHuffmanTree(); }
/* * Given a table of frequencies as an input, this function will generate the compression table. * Caller must pass in an empty compression table to be filled by this function, and a table * of character frequencies. * * Table will have 256 entries and look like: * 01001 * 001 * 0111 * ... * 00001 */ eFileCode GetTableForGeneric(unsigned long long pFrequencies[ENTRIES], huffResult resultArray[256]) { createHuffmanTree(pFrequencies, resultArray); return FILE_SUCCESS; }
/* * Given a table of frequencies as an input, this function will generate the compression table. * Caller must pass in an empty compression table to be filled by this function, and a table * of character frequencies. * * Table will have 256 entries and look like: * 01001 * 001 * 0111 * ... * 00001 */ eFileCode GetTableForGeneric(unsigned* pFrequencies, huffResult** resultArray) { *resultArray = createHuffmanTree(pFrequencies); return FILE_SUCCESS; }