Ejemplo n.º 1
0
		float calc_entropy(const string& word
				, const trie_result_t& res
				, trie_t& entro_trie
				, const uint32_t total_freq)
		{
			char suffix[256];
			hash_t		rlt_hash;
			trie_result_list rlts;
			size_t rlts_len = entro_trie.commonPrefixPredict(word.c_str(), rlts, NUM_RESULT);
			float entropy = 0.0;
			if(rlts_len > 1) {
				int entropy_freq = 0;
				// Ignore itself
				trie_result_list::iterator it = rlts.begin();
				for(it++; it != rlts.end(); it++) {
					assert(it->length < 250);
					entro_trie.suffix(suffix, it->length, it->id);
					string tmp_s(suffix);
					string tmp(tmp_s.begin(), tmp_s.begin()+2);
					//fprintf(glog.fd, "%s %s %s %d\n",word.c_str(), suffix, tmp.c_str(), it->value);

					hash_t::iterator it_map = rlt_hash.find(tmp);
					if (it_map == rlt_hash.end()) {
						rlt_hash[tmp] = it->value;
					} else {
						it_map->second += it->value;
					}

					entropy_freq += it->value;
				}
				for(hash_t::iterator map_it = rlt_hash.begin();
						map_it != rlt_hash.end();
						map_it++) {
					float p = static_cast<float>(map_it->second) / entropy_freq;
					entropy -= p * log(p);
					//fprintf(glog.fd, "entropy %s\t%d\n", map_it->first.c_str(), map_it->second);
				}
			} else {
				entropy = static_cast<float>(res.value)/20.0;
			}

			return entropy;
		}