Beispiel #1
0
typename SubstNodeScore<V,D>::value_type
SubstNodeScore<V,D>::
node_score(const Data& xx, const Data& yy,
	   uint i, uint j) const
{
  const std::vector<Node>& x(xx.tree);
  const std::vector<Node>& y(yy.tree);

  value_type v_c = 0.0;
  DAG::bp_freq_iterator ix,iy;
  for (ix=x[i].bp_freq_begin(); ix!=x[i].bp_freq_end(); ++ix) {
    rna_t a = ix->first.first;
    rna_t b = ix->first.second;
    value_type cx = ix->second;

    for (iy=y[j].bp_freq_begin(); iy!=y[j].bp_freq_end(); ++iy) {
      rna_t c = iy->first.first;
      rna_t d = iy->first.second;
      value_type cy = iy->second;

      v_c += co_subst_[a][b][c][d]*cx*cy;
    }
  }

  {
    const Seq& x_seq(xx.seq);
    const Seq& y_seq(yy.seq);

    value_type nbp_x = x_seq[x[i].first()][RNA_GAP];
    /*value_type nbp_x = x_seq[x[i].last()][RNA_GAP];*/
    v_c += node_score(yy, j) * nbp_x / x_seq.n_seqs();
  
    value_type nbp_y = y_seq[y[j].first()][RNA_GAP];
    /*value_type nbp_y = y_seq[y[j].last()][RNA_GAP];*/
    v_c += node_score(xx, i) * nbp_y / y_seq.n_seqs();
  }

  return v_c;
}
Beispiel #2
0
//-----------------------------------------------------------------------------
int main()
{
  {
    // Generate some data
    plJointDistribution orig = make_model();
    generate_data(orig, "model_asia.csv", 10000);
    plCSVDataDescriptor dataset("model_asia.csv", orig.get_variables());
    plNodeScoreBIC node_score(dataset);
    std::cout << "Original model: " << orig << std::endl
              << "BIC score of the original model on the whole dataset: "
              << node_score(orig) << std::endl;
    save("model_asia", orig);
  }

  plSymbol A("A", PL_BINARY_TYPE); // visit to Asia?
  plSymbol S("S", PL_BINARY_TYPE); // Smoker?
  plSymbol T("T", PL_BINARY_TYPE); // has Tuberculosis
  plSymbol L("L", PL_BINARY_TYPE); // has Lung cancer
  plSymbol B("B", PL_BINARY_TYPE); // has Bronchitis
  plSymbol O("O", PL_BINARY_TYPE); // has tuberculosis Or cancer
  plSymbol X("X", PL_BINARY_TYPE); // positive X-Ray
  plSymbol D("D", PL_BINARY_TYPE); // Dyspnoea?

  plVariablesConjunction variables = A^S^T^L^B^O^X^D;
  plCSVDataDescriptor dataset("model_asia.csv", variables);
  plStructureLearner learner(variables);

  // Learn the dependancy structure between our variables from the
  // dataset, using the Directed Maximum Spanning Tree algorithm.
  unsigned int root_index = 0; // using 'A' as the root node.
  std::vector<plSymbol> order;
  plEdgeScoreBIC edge_score(dataset);
  bool result = learner.DMST(edge_score, order, variables[root_index]);
  plJointDistribution result_dmst = learner.get_joint_distribution(dataset);

  // Apply the GS algorithm with BIC score on the same dataset.
  // Use the output of the DMST algo as a starting point.
  plNodeScoreBIC node_score(dataset);
  learner.GS(node_score);
  plJointDistribution result_gs = learner.get_joint_distribution(dataset);

  std::cout << "DMST-BIC obtained the following model: " << result_dmst
            << std::endl
            << "BIC score of the learned model on the whole dataset: "
            << node_score(result_dmst) << std::endl;

  std::cout << "DMST + GS obtained the following model: " << result_gs
            << std::endl
            << "BIC score of the learned model on the whole dataset: "
            << node_score(result_gs) << std::endl;

  save("dmst_bic", result_dmst);
  save("dmst-gs_bic", result_gs);

  // On Windows (Visual C++, MinGW) only.
#if defined(WIN32) || defined(_WIN32)
  std::cout << "Press any key to terminate..." << std::endl;
  getchar();
#endif

  return 0;
}