typename SubstNodeScore<V,D>::value_type SubstNodeScore<V,D>:: node_score(const Data& xx, const Data& yy, uint i, uint j) const { const std::vector<Node>& x(xx.tree); const std::vector<Node>& y(yy.tree); value_type v_c = 0.0; DAG::bp_freq_iterator ix,iy; for (ix=x[i].bp_freq_begin(); ix!=x[i].bp_freq_end(); ++ix) { rna_t a = ix->first.first; rna_t b = ix->first.second; value_type cx = ix->second; for (iy=y[j].bp_freq_begin(); iy!=y[j].bp_freq_end(); ++iy) { rna_t c = iy->first.first; rna_t d = iy->first.second; value_type cy = iy->second; v_c += co_subst_[a][b][c][d]*cx*cy; } } { const Seq& x_seq(xx.seq); const Seq& y_seq(yy.seq); value_type nbp_x = x_seq[x[i].first()][RNA_GAP]; /*value_type nbp_x = x_seq[x[i].last()][RNA_GAP];*/ v_c += node_score(yy, j) * nbp_x / x_seq.n_seqs(); value_type nbp_y = y_seq[y[j].first()][RNA_GAP]; /*value_type nbp_y = y_seq[y[j].last()][RNA_GAP];*/ v_c += node_score(xx, i) * nbp_y / y_seq.n_seqs(); } return v_c; }
//----------------------------------------------------------------------------- int main() { { // Generate some data plJointDistribution orig = make_model(); generate_data(orig, "model_asia.csv", 10000); plCSVDataDescriptor dataset("model_asia.csv", orig.get_variables()); plNodeScoreBIC node_score(dataset); std::cout << "Original model: " << orig << std::endl << "BIC score of the original model on the whole dataset: " << node_score(orig) << std::endl; save("model_asia", orig); } plSymbol A("A", PL_BINARY_TYPE); // visit to Asia? plSymbol S("S", PL_BINARY_TYPE); // Smoker? plSymbol T("T", PL_BINARY_TYPE); // has Tuberculosis plSymbol L("L", PL_BINARY_TYPE); // has Lung cancer plSymbol B("B", PL_BINARY_TYPE); // has Bronchitis plSymbol O("O", PL_BINARY_TYPE); // has tuberculosis Or cancer plSymbol X("X", PL_BINARY_TYPE); // positive X-Ray plSymbol D("D", PL_BINARY_TYPE); // Dyspnoea? plVariablesConjunction variables = A^S^T^L^B^O^X^D; plCSVDataDescriptor dataset("model_asia.csv", variables); plStructureLearner learner(variables); // Learn the dependancy structure between our variables from the // dataset, using the Directed Maximum Spanning Tree algorithm. unsigned int root_index = 0; // using 'A' as the root node. std::vector<plSymbol> order; plEdgeScoreBIC edge_score(dataset); bool result = learner.DMST(edge_score, order, variables[root_index]); plJointDistribution result_dmst = learner.get_joint_distribution(dataset); // Apply the GS algorithm with BIC score on the same dataset. // Use the output of the DMST algo as a starting point. plNodeScoreBIC node_score(dataset); learner.GS(node_score); plJointDistribution result_gs = learner.get_joint_distribution(dataset); std::cout << "DMST-BIC obtained the following model: " << result_dmst << std::endl << "BIC score of the learned model on the whole dataset: " << node_score(result_dmst) << std::endl; std::cout << "DMST + GS obtained the following model: " << result_gs << std::endl << "BIC score of the learned model on the whole dataset: " << node_score(result_gs) << std::endl; save("dmst_bic", result_dmst); save("dmst-gs_bic", result_gs); // On Windows (Visual C++, MinGW) only. #if defined(WIN32) || defined(_WIN32) std::cout << "Press any key to terminate..." << std::endl; getchar(); #endif return 0; }