예제 #1
0
void Cluster::ListEveryTopicWeiboId(Topic &one_topic) {
    list<TopicWord>::iterator topic_it = one_topic.GetsTopic()->begin();
    std::map<std::string, double> *topic_weibo_id_map =one_topic.GetTopicWeiboId();
    std::map<std::string, double>::iterator topic_weibo_id_map_it;
    one_topic.topic_message_num = 0;


    for (; topic_it != one_topic.GetsTopic()->end(); ++topic_it) {
        std::set<std::string>::iterator topicword_weibolist_it =
            topic_it->GetWordToWeiboidList()->begin();
        for (; topicword_weibolist_it
                != topic_it->GetWordToWeiboidList()->end(); ++topicword_weibolist_it) {
            std::string weiboid = *topicword_weibolist_it;
            topic_weibo_id_map_it = topic_weibo_id_map->find(weiboid);
            if (topic_weibo_id_map_it != topic_weibo_id_map->end()) {
                topic_weibo_id_map_it->second = topic_weibo_id_map_it->second + 1;
            } else {
                topic_weibo_id_map->insert(make_pair(weiboid, 1.0));
            }
        }
    }



    topic_weibo_id_map_it = topic_weibo_id_map->begin();

    for (; topic_weibo_id_map_it != topic_weibo_id_map->end(); ++topic_weibo_id_map_it) {
        if (topic_weibo_id_map_it->second >= this->BELONG_TOPIC_THROD) {
            subword sw(topic_weibo_id_map_it->first,topic_weibo_id_map_it->second);

            one_topic.GetWeiboIdList()->push_back(sw);
            one_topic.topic_message_num += 1;
        }
    }

    one_topic.GetTopicWeiboId()->clear();
    std::sort(one_topic.GetWeiboIdList()->begin(),
              one_topic.GetWeiboIdList()->end(), weibosort);

    //及时清除内存
    if(one_topic.topic_message_num<=this->MIN_TOPIC_MESSAGE_NUM) {
        one_topic.weibo_id_list.clear();
        std::vector<subword>(one_topic.weibo_id_list).swap(one_topic.weibo_id_list);
    }
    //将得出的结果存进数据库
}
예제 #2
0
/*@input
 * 		Topic:话题
 * 		TopicWord:特征词
 * 计算簇与特征词之间的距离
 */
double Cluster::Cal_Words_Topic_Distance(Topic &topic, TopicWord &topic_word) {
    double topic_word_dis = 0.0;
    std::string keyword = topic_word.GetTopicWord();
    std::map<std::string, double>* map_map;
    std::map<string, CooccurrenceWord>::iterator co_it = co_ccur_matrix.find(keyword);
    if (co_it != co_ccur_matrix.end()) {
        map_map = co_it->second.GetWordCooccurrence();
    } else {
        return 0.0;
    }
    list<TopicWord>::iterator clu_it = topic.GetsTopic()->begin();//这里一定要指针??为什么?
    std::string one_topic_word;
    std::map<std::string, double>::iterator second_it;
    for (; clu_it != topic.GetsTopic()->end(); ++clu_it) {
        one_topic_word = clu_it->GetTopicWord();
        second_it = map_map->find(one_topic_word);
        if (second_it != map_map->end()) {
            topic_word_dis += second_it->second;

        }
    }
    return topic_word_dis / topic.GetsTopic()->size();
}