float calc_entropy(const string& word , const trie_result_t& res , trie_t& entro_trie , const uint32_t total_freq) { char suffix[256]; hash_t rlt_hash; trie_result_list rlts; size_t rlts_len = entro_trie.commonPrefixPredict(word.c_str(), rlts, NUM_RESULT); float entropy = 0.0; if(rlts_len > 1) { int entropy_freq = 0; // Ignore itself trie_result_list::iterator it = rlts.begin(); for(it++; it != rlts.end(); it++) { assert(it->length < 250); entro_trie.suffix(suffix, it->length, it->id); string tmp_s(suffix); string tmp(tmp_s.begin(), tmp_s.begin()+2); //fprintf(glog.fd, "%s %s %s %d\n",word.c_str(), suffix, tmp.c_str(), it->value); hash_t::iterator it_map = rlt_hash.find(tmp); if (it_map == rlt_hash.end()) { rlt_hash[tmp] = it->value; } else { it_map->second += it->value; } entropy_freq += it->value; } for(hash_t::iterator map_it = rlt_hash.begin(); map_it != rlt_hash.end(); map_it++) { float p = static_cast<float>(map_it->second) / entropy_freq; entropy -= p * log(p); //fprintf(glog.fd, "entropy %s\t%d\n", map_it->first.c_str(), map_it->second); } } else { entropy = static_cast<float>(res.value)/20.0; } return entropy; }