static std::vector<u_int32_t> PerformSingleClusteringIteration(const std::vector<Datatype, Allocator>& data, std::function<double(const Datatype&, const Datatype&)>& distance_fn, const std::vector<Datatype, Allocator>& cluster_centers)
 {
     std::vector<u_int32_t> cluster_labels(data.size());
     for (size_t idx = 0; idx < data.size(); idx++)
     {
         const Datatype& datapoint = data[idx];
         const u_int32_t label = GetClosestCluster(datapoint, distance_fn, cluster_centers);
         cluster_labels[idx] = label;
     }
     return cluster_labels;
 }
Ejemplo n.º 2
0
bool BagOfWordsExtractor::dictionarize()
{
    //adjust the shape of the sample matrix in case it was not completely filled.
    if(current_row<max_bow_features)
        return false;


    fprintf(stdout,"dictionarizing!\n");

    cv::Mat cluster_labels(n_centers,1,CV_32S);
    cv::kmeans(samples,n_centers,cluster_labels,cv::TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1),n_attempts,cv::KMEANS_RANDOM_CENTERS,&centers);

    fprintf(stdout,"dictionarized!\n");

    //save the dictionary
    fstream f_dic;
    f_dic.open((context_path+"/"+name+"-"+time_tag+".dict").c_str(),fstream::out | ios_base::binary);
    float *tmp_centers=new float[centers.rows*centers.cols];
    for(int i=0; i<centers.rows; i++)
        for(int j=0; j<centers.cols; j++)
            tmp_centers[i*centers.cols+j]=centers.at<float>(i,j);
    f_dic.write((char*)tmp_centers,sizeof(float)*centers.rows*centers.cols);
    f_dic.close();

    fstream f_dic_ini;
    f_dic_ini.open((context_path+"/"+name+"-"+time_tag+".ini").c_str(),fstream::out);
    f_dic_ini<<"dictionary_type"<<TABS<<type<<endl;
    f_dic_ini<<"n_words"<<TABS<<n_centers<<endl;
    f_dic_ini<<"word_dimension"<<TABS<<bow_feature_size<<endl;
    f_dic_ini<<"dictionary"<<TABS<<name+"-"+time_tag<<endl;
    f_dic_ini.close();

    update_ini();

    dictionarized=true;

    return true;
}