MatrixPtr GenerateClusteredData::operator()() { auto matrix = std::make_shared<Matrix>(); matrix->reserve(nbrInds); Variables variables; for (size_t var = 0; var < nbrClusters * clustSize; ++var) { variables ^= Variable( boost::lexical_cast<std::string>(var), plIntegerType(0, cardinality-1) ); } Clustering clustering; clustering.reserve(nbrClusters); for ( size_t clust = 0; clust < nbrClusters; ++clust ) { Cluster cluster; for ( size_t item = 0; item < clustSize; ++item ) { cluster.push_back( clust*clustSize + item ); } clustering.push_back( cluster ); } plJointDistribution jointDist = createClusteringJointDist( variables, clustering); plValues values( variables ); // std::cout << jointDist << std::endl << jointDist.get_computable_object_list() << std::endl; for (size_t ind = 0; ind < nbrInds; ++ind) { jointDist.draw(values); std::vector<int> row(variables.size()); for (size_t var = 0; var < variables.size(); ++var) { row[var] = values[variables[var]]; } matrix->push_back(row); } //std::cout << jointDist << std::endl; return Transpose(*matrix); }
inline vertex_t createVertex( Graph& graph, const unsigned &cardinality, const bool isLeaf, const std::string &label = "", const unsigned &position = -1, const unsigned &level = -1) { vertex_t vertexId = boost::add_vertex(graph); // adds a new Node to the graph and returns the newly added node's index. Node &newNode = graph[vertexId]; newNode.variable = RandomVariable(label, plIntegerType(0, cardinality - 1)); newNode.isLeaf = isLeaf; newNode.position = position; // physical position on the genome newNode.label = label; newNode.index = vertexId; newNode.graph = &graph; newNode.level = level; return vertexId; }
void FLTM::execute( ClustAlgoPtr clustAlgo, CardFuncPtr cardFunc, GraphPtr graph ) { auto lab2Idx = create_index_map(*graph); Local2GlobalPtr l2g = create_local_to_global_map(*graph); auto criteria = clustAlgo->get_criteria(); int verticesNb = boost::num_vertices(*graph); for ( int step = 0; step < params.nbrSteps; ++step) { if (step > 0) { criteria = create_current_criteria( *graph, *l2g, params.maxDist, step); clustAlgo->set_measure( graph, l2g, criteria ); } BOOST_LOG_TRIVIAL(trace) << "FLTM - step[" << step << "] over " << params.nbrSteps; BOOST_LOG_TRIVIAL(trace) << "running clustering " << clustAlgo->name() << " on " << l2g->size(); auto partition = clustAlgo->run(); auto clustering = partition.to_clustering(); auto SIZE = l2g->size(); int nonSingletons = number_non_singletons(clustering); BOOST_LOG_TRIVIAL(trace) << "to obtain " << clustering.size() << " clusters with " << nonSingletons << " non-singletons clusters" ; if ( nonSingletons == 0 ) { BOOST_LOG_TRIVIAL(trace) << "stop due to only singleton."; return; } std::vector<int> l2gTemp(*l2g); Local2Global().swap(*l2g); int nbrGoodClusters = 0; // loop without any parallelization // for ( auto &cluster: clustering ) { // if ( cluster.size() > 1 ) { // //numClust++; // RandVar var("latent-"+boost::lexical_cast<std::string>(boost::num_vertices(*graph)), // plIntegerType(0, cardFunc->compute(cluster) - 1 )); // Node latentNode = create_latent_node( graph, var, l2gTemp, lab2Idx, cluster); // MultiEM em(params.nbrRestarts); // em.run( *graph, latentNode, params.emThres); // if ( accept_latent_variable( *graph, latentNode, params.latentVarQualityThres) ) { // nbrGoodClusters++; // add_latent_node( *graph, latentNode ); // update_index_map( *l2g, l2gTemp, latentNode ); // lab2Idx[ latentNode.getLabel() ] = latentNode.index; // for ( auto item: cluster ) { // // l2g.push_back( currentL2G.at(item) ); // boost::add_edge( latentNode.index, l2gTemp.at(item), *graph); // } // } else { // update_index_map( *l2g, l2gTemp, cluster); // } // } else { // update_index_map( *l2g, l2gTemp, cluster); // } // } // loop with working parallelization #ifdef _OPENMP //sets the max number of threads we can use omp_set_num_threads(params.jobsNumber); #endif //the array of shared resources in which the differents threads write Node latentVector[nonSingletons]; //the parallelizable section #pragma omp parallel for schedule(dynamic) for ( int i = 0 ; i < clustering.size() ; ++i) { if ( clustering[i].size() > 1 ) { RandVar var("latent-"+std::to_string(verticesNb + i), plIntegerType(0, cardFunc->compute(clustering[i]) - 1 )); latentVector[i] = create_latent_node( graph, var, l2gTemp, lab2Idx, clustering[i]); MultiEM em(params.nbrRestarts); em.run( *graph, latentVector[i], params.emThres); } } //the non parallelizable section for ( int i = 0 ; i < clustering.size() ; ++i) { if (clustering[i].size() > 1 && accept_latent_variable( *graph, latentVector[i], params.latentVarQualityThres)) { nbrGoodClusters++; add_latent_node( *graph, latentVector[i] ); update_index_map( *l2g, l2gTemp, latentVector[i] ); lab2Idx[ latentVector[i].getLabel() ] = latentVector[i].index; for ( auto item: clustering[i] ) { boost::add_edge( latentVector[i].index, l2gTemp.at(item), *graph); } } else { update_index_map( *l2g, l2gTemp, clustering[i]); } } verticesNb += nonSingletons ; // loop with parallelization slower than over // #pragma omp parallel for schedule(static) // for ( auto cluster = clustering.begin(); cluster < clustering.end() ; ++cluster) { // //for ( auto &cluster: clustering ) { // if ( cluster->size() > 1 ) { // RandVar var("latent-"+boost::lexical_cast<std::string>(boost::num_vertices(*graph)), // plIntegerType(0, cardFunc->compute(*cluster) - 1 )); // Node latentNode = create_latent_node( graph, var, l2gTemp, lab2Idx, *cluster); // MultiEM em(params.nbrRestarts); // em.run( *graph, latentNode, params.emThres); // if ( accept_latent_variable( *graph, latentNode, params.latentVarQualityThres) ) { // #pragma omp critical // { // nbrGoodClusters++; // add_latent_node( *graph, latentNode ); // update_index_map( *l2g, l2gTemp, latentNode ); // lab2Idx[ latentNode.getLabel() ] = latentNode.index; // for ( auto item: *cluster ) { // // l2g.push_back( currentL2G.at(item) ); // boost::add_edge( latentNode.index, l2gTemp.at(item), *graph); // } // } // } else { // #pragma omp critical // { // update_index_map( *l2g, l2gTemp, *cluster); // } // } // } else { // #pragma omp critical // { // update_index_map( *l2g, l2gTemp, *cluster); // } // } // } BOOST_LOG_TRIVIAL(trace) << "nbrGoodClusters: " << nbrGoodClusters; if ( nbrGoodClusters == 0 ) { BOOST_LOG_TRIVIAL(trace) << "stop due to zero good clusters."; return; } if (l2g->size() <= 1) { BOOST_LOG_TRIVIAL(trace) << "stop due to zero or only one cluster."; return; } BOOST_LOG_TRIVIAL(trace) << std::endl << std::endl; } }