void Cluster::ListEveryTopicWeiboId(Topic &one_topic) { list<TopicWord>::iterator topic_it = one_topic.GetsTopic()->begin(); std::map<std::string, double> *topic_weibo_id_map =one_topic.GetTopicWeiboId(); std::map<std::string, double>::iterator topic_weibo_id_map_it; one_topic.topic_message_num = 0; for (; topic_it != one_topic.GetsTopic()->end(); ++topic_it) { std::set<std::string>::iterator topicword_weibolist_it = topic_it->GetWordToWeiboidList()->begin(); for (; topicword_weibolist_it != topic_it->GetWordToWeiboidList()->end(); ++topicword_weibolist_it) { std::string weiboid = *topicword_weibolist_it; topic_weibo_id_map_it = topic_weibo_id_map->find(weiboid); if (topic_weibo_id_map_it != topic_weibo_id_map->end()) { topic_weibo_id_map_it->second = topic_weibo_id_map_it->second + 1; } else { topic_weibo_id_map->insert(make_pair(weiboid, 1.0)); } } } topic_weibo_id_map_it = topic_weibo_id_map->begin(); for (; topic_weibo_id_map_it != topic_weibo_id_map->end(); ++topic_weibo_id_map_it) { if (topic_weibo_id_map_it->second >= this->BELONG_TOPIC_THROD) { subword sw(topic_weibo_id_map_it->first,topic_weibo_id_map_it->second); one_topic.GetWeiboIdList()->push_back(sw); one_topic.topic_message_num += 1; } } one_topic.GetTopicWeiboId()->clear(); std::sort(one_topic.GetWeiboIdList()->begin(), one_topic.GetWeiboIdList()->end(), weibosort); //及时清除内存 if(one_topic.topic_message_num<=this->MIN_TOPIC_MESSAGE_NUM) { one_topic.weibo_id_list.clear(); std::vector<subword>(one_topic.weibo_id_list).swap(one_topic.weibo_id_list); } //将得出的结果存进数据库 }
/*@input * Topic:话题 * TopicWord:特征词 * 计算簇与特征词之间的距离 */ double Cluster::Cal_Words_Topic_Distance(Topic &topic, TopicWord &topic_word) { double topic_word_dis = 0.0; std::string keyword = topic_word.GetTopicWord(); std::map<std::string, double>* map_map; std::map<string, CooccurrenceWord>::iterator co_it = co_ccur_matrix.find(keyword); if (co_it != co_ccur_matrix.end()) { map_map = co_it->second.GetWordCooccurrence(); } else { return 0.0; } list<TopicWord>::iterator clu_it = topic.GetsTopic()->begin();//这里一定要指针??为什么? std::string one_topic_word; std::map<std::string, double>::iterator second_it; for (; clu_it != topic.GetsTopic()->end(); ++clu_it) { one_topic_word = clu_it->GetTopicWord(); second_it = map_map->find(one_topic_word); if (second_it != map_map->end()) { topic_word_dis += second_it->second; } } return topic_word_dis / topic.GetsTopic()->size(); }