Beispiel #1
0
MatrixPtr GenerateClusteredData::operator()() {
  auto matrix = std::make_shared<Matrix>();
  matrix->reserve(nbrInds);
  Variables variables;
  
  for (size_t var = 0; var < nbrClusters * clustSize; ++var) {
    variables ^= Variable( boost::lexical_cast<std::string>(var),
                           plIntegerType(0, cardinality-1) );
  }

  Clustering clustering; clustering.reserve(nbrClusters);
  for ( size_t clust = 0; clust < nbrClusters; ++clust ) {
    Cluster cluster;
    for ( size_t item = 0; item < clustSize; ++item ) {
      cluster.push_back( clust*clustSize + item ); 
    }
    clustering.push_back( cluster );
  }
  
  plJointDistribution jointDist = createClusteringJointDist( variables, clustering);
  plValues values( variables );
  // std::cout << jointDist << std::endl << jointDist.get_computable_object_list() << std::endl;
  for (size_t ind = 0; ind < nbrInds; ++ind) {
    jointDist.draw(values);   
    std::vector<int> row(variables.size()); 
    for (size_t var = 0; var < variables.size(); ++var) {
      row[var] = values[variables[var]];  
    }
    matrix->push_back(row);
  }

  //std::cout << jointDist << std::endl;
  return Transpose(*matrix);
}
Beispiel #2
0
inline vertex_t createVertex( Graph& graph,
                              const unsigned &cardinality,
                              const bool isLeaf,                              
                              const std::string &label = "",
                              const unsigned &position = -1,
                              const unsigned &level = -1) {
  
  vertex_t vertexId = boost::add_vertex(graph); // adds a new Node to the graph and returns the newly added node's index.

  Node &newNode = graph[vertexId];
  newNode.variable = RandomVariable(label, plIntegerType(0, cardinality - 1));
  newNode.isLeaf = isLeaf;
  newNode.position = position; // physical position on the genome
  newNode.label = label;
  newNode.index = vertexId;
  newNode.graph = &graph;
  newNode.level = level;
  return vertexId;
}
Beispiel #3
0
void FLTM::execute( ClustAlgoPtr clustAlgo, CardFuncPtr cardFunc, GraphPtr graph ) {
  auto lab2Idx = create_index_map(*graph);
  Local2GlobalPtr l2g = create_local_to_global_map(*graph);  
  auto criteria = clustAlgo->get_criteria();
  int verticesNb = boost::num_vertices(*graph);

  for ( int step = 0; step < params.nbrSteps; ++step) {
    if (step > 0) {
      criteria = create_current_criteria( *graph, *l2g, params.maxDist, step);
      clustAlgo->set_measure( graph, l2g, criteria );
    }

   
    BOOST_LOG_TRIVIAL(trace) << "FLTM - step[" << step << "] over " << params.nbrSteps;
    BOOST_LOG_TRIVIAL(trace) << "running clustering " << clustAlgo->name()
                             << " on " << l2g->size();
    auto partition = clustAlgo->run();
    auto clustering = partition.to_clustering();
    auto SIZE = l2g->size();
    int nonSingletons =  number_non_singletons(clustering);
    BOOST_LOG_TRIVIAL(trace) << "to obtain " << clustering.size() << " clusters with " << nonSingletons << " non-singletons clusters" ;
    if ( nonSingletons == 0 ) {
      BOOST_LOG_TRIVIAL(trace) << "stop due to only singleton.";
      return;
    }

    std::vector<int> l2gTemp(*l2g);
    Local2Global().swap(*l2g);  
    int nbrGoodClusters = 0;

//      loop without any parallelization

//    for ( auto &cluster: clustering ) {
//      if ( cluster.size() > 1 ) {
//        //numClust++;
//        RandVar var("latent-"+boost::lexical_cast<std::string>(boost::num_vertices(*graph)),
//                    plIntegerType(0, cardFunc->compute(cluster) - 1 ));
//        Node latentNode = create_latent_node( graph, var, l2gTemp, lab2Idx, cluster);
//        MultiEM em(params.nbrRestarts);
//        em.run( *graph, latentNode, params.emThres);
//        if ( accept_latent_variable( *graph, latentNode, params.latentVarQualityThres) ) {
//          nbrGoodClusters++;
//          add_latent_node( *graph, latentNode );
//          update_index_map( *l2g, l2gTemp, latentNode );
//          lab2Idx[ latentNode.getLabel() ] = latentNode.index;

//          for ( auto item: cluster ) {
//            // l2g.push_back( currentL2G.at(item) );
//            boost::add_edge( latentNode.index, l2gTemp.at(item), *graph);
//          }
            
//        } else {
//          update_index_map( *l2g, l2gTemp, cluster);
//        }
//      } else {
//        update_index_map( *l2g, l2gTemp, cluster);
//      }
//    }


//      loop with working parallelization

       #ifdef _OPENMP
           //sets the max number of threads we can use
           omp_set_num_threads(params.jobsNumber);
       #endif
       //the array of shared resources in which the differents threads write
       Node latentVector[nonSingletons];

       //the parallelizable section
       #pragma omp parallel for schedule(dynamic)
       for ( int i = 0 ; i < clustering.size() ; ++i) {
          if ( clustering[i].size() > 1 ) {
            RandVar var("latent-"+std::to_string(verticesNb + i),
                        plIntegerType(0, cardFunc->compute(clustering[i]) - 1 ));
            latentVector[i] = create_latent_node( graph, var, l2gTemp, lab2Idx, clustering[i]);
            MultiEM em(params.nbrRestarts);
            em.run( *graph, latentVector[i], params.emThres);
          }
        }

        //the non parallelizable section
        for ( int i = 0 ; i < clustering.size() ; ++i) {
            if (clustering[i].size() > 1 && accept_latent_variable( *graph, latentVector[i], params.latentVarQualityThres)) {
                  nbrGoodClusters++;
                  add_latent_node( *graph, latentVector[i] );
                  update_index_map( *l2g, l2gTemp, latentVector[i] );
                  lab2Idx[ latentVector[i].getLabel() ] = latentVector[i].index;
                  for ( auto item: clustering[i] ) {
                    boost::add_edge( latentVector[i].index, l2gTemp.at(item), *graph);
                  }
            } else {
                update_index_map( *l2g, l2gTemp, clustering[i]);
            }
        }
        verticesNb += nonSingletons ;



//      loop with parallelization slower than over


//    #pragma omp parallel for schedule(static)
//    for ( auto cluster = clustering.begin(); cluster < clustering.end() ; ++cluster) {
//    //for ( auto &cluster: clustering ) {
//      if ( cluster->size() > 1 ) {
//        RandVar var("latent-"+boost::lexical_cast<std::string>(boost::num_vertices(*graph)),
//                    plIntegerType(0, cardFunc->compute(*cluster) - 1 ));
//        Node latentNode = create_latent_node( graph, var, l2gTemp, lab2Idx, *cluster);
//        MultiEM em(params.nbrRestarts);
//        em.run( *graph, latentNode, params.emThres);

//        if ( accept_latent_variable( *graph, latentNode, params.latentVarQualityThres) ) {
//        #pragma omp critical
//        {
//          nbrGoodClusters++;
//          add_latent_node( *graph, latentNode );
//          update_index_map( *l2g, l2gTemp, latentNode );
//          lab2Idx[ latentNode.getLabel() ] = latentNode.index;

//          for ( auto item: *cluster ) {
//            // l2g.push_back( currentL2G.at(item) );
//            boost::add_edge( latentNode.index, l2gTemp.at(item), *graph);
//          }
//        }

//        } else {
//        #pragma omp critical
//        {
//          update_index_map( *l2g, l2gTemp, *cluster);
//      }
//            }

//      } else {
//        #pragma omp critical
//        {
//        update_index_map( *l2g, l2gTemp, *cluster);
//        }
//      }
//    }

    BOOST_LOG_TRIVIAL(trace) << "nbrGoodClusters: " << nbrGoodClusters;

    if ( nbrGoodClusters == 0 ) {
      BOOST_LOG_TRIVIAL(trace) << "stop due to zero good clusters.";
      return;
    }
    if (l2g->size() <= 1) {
      BOOST_LOG_TRIVIAL(trace) << "stop due to zero or only one cluster.";
      return;
    }
    BOOST_LOG_TRIVIAL(trace) << std::endl  << std::endl;
  }
}