static std::vector<u_int32_t> PerformSingleClusteringIteration(const std::vector<Datatype, Allocator>& data, std::function<double(const Datatype&, const Datatype&)>& distance_fn, const std::vector<Datatype, Allocator>& cluster_centers) { std::vector<u_int32_t> cluster_labels(data.size()); for (size_t idx = 0; idx < data.size(); idx++) { const Datatype& datapoint = data[idx]; const u_int32_t label = GetClosestCluster(datapoint, distance_fn, cluster_centers); cluster_labels[idx] = label; } return cluster_labels; }
bool BagOfWordsExtractor::dictionarize() { //adjust the shape of the sample matrix in case it was not completely filled. if(current_row<max_bow_features) return false; fprintf(stdout,"dictionarizing!\n"); cv::Mat cluster_labels(n_centers,1,CV_32S); cv::kmeans(samples,n_centers,cluster_labels,cv::TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1),n_attempts,cv::KMEANS_RANDOM_CENTERS,¢ers); fprintf(stdout,"dictionarized!\n"); //save the dictionary fstream f_dic; f_dic.open((context_path+"/"+name+"-"+time_tag+".dict").c_str(),fstream::out | ios_base::binary); float *tmp_centers=new float[centers.rows*centers.cols]; for(int i=0; i<centers.rows; i++) for(int j=0; j<centers.cols; j++) tmp_centers[i*centers.cols+j]=centers.at<float>(i,j); f_dic.write((char*)tmp_centers,sizeof(float)*centers.rows*centers.cols); f_dic.close(); fstream f_dic_ini; f_dic_ini.open((context_path+"/"+name+"-"+time_tag+".ini").c_str(),fstream::out); f_dic_ini<<"dictionary_type"<<TABS<<type<<endl; f_dic_ini<<"n_words"<<TABS<<n_centers<<endl; f_dic_ini<<"word_dimension"<<TABS<<bow_feature_size<<endl; f_dic_ini<<"dictionary"<<TABS<<name+"-"+time_tag<<endl; f_dic_ini.close(); update_ini(); dictionarized=true; return true; }