void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[] ) { if(nrhs!=5) mexErrMsgTxt("There should be exactly 6 input parameters"); /* Input parameters */ ConstMatlabMultiArray<double> leaves_part(prhs[0]); ConstMatlabMultiArray<double> mer_seq (prhs[1]); ConstMatlabMultiArray<double> neigh_pairs_min(prhs[2]); ConstMatlabMultiArray<double> neigh_pairs_max(prhs[3]); ConstMatlabMultiArray<double> num_cands_in(prhs[4]); // Number of pairs, triplets, etc. double n_reg_cand; std::vector<double> num_cands; if (num_cands_in.shape()[0]==1) { n_reg_cand = num_cands_in.shape()[1] + 1; num_cands.resize(n_reg_cand); for (std::size_t ii=1; ii<n_reg_cand; ++ii) num_cands[ii] = num_cands_in[0][ii-1]; } else if (num_cands_in.shape()[1]==1) { n_reg_cand = num_cands_in.shape()[0] + 1; num_cands.resize(n_reg_cand); for (std::size_t ii=1; ii<n_reg_cand; ++ii) num_cands[ii] = num_cands_in[ii-1][0]; } else mexErrMsgTxt("Number of candidates should be a vector"); std::size_t sx = leaves_part.shape()[0]; std::size_t sy = leaves_part.shape()[1]; std::size_t n_merges = mer_seq.shape()[0]; std::size_t n_sons_max = mer_seq.shape()[1]-1; std::size_t n_leaves = mer_seq[0][n_sons_max]; // n_merges+1; --> Not valid for non-binary trees std::size_t n_regs = n_leaves+n_merges; std::size_t n_pairs = neigh_pairs_min.shape()[0]; // ********************************************************************************* // Compute the neighbors, descendants, siblings, and ascendants of each region // ********************************************************************************* // Prepare cells to store results std::vector<std::vector<unsigned int> > neighbors(n_regs); std::vector<std::vector<unsigned int> > descendants(n_regs); std::vector<std::vector<unsigned int> > ascendants(n_regs); std::vector<std::vector<unsigned int> > siblings(n_regs); // Start leaves for (std::size_t ii=0; ii<n_leaves; ++ii) descendants[ii].push_back(ii); // Add initial pairs for (std::size_t ii=0; ii<n_pairs; ++ii) { unsigned int min_id = neigh_pairs_min[ii][0]; unsigned int max_id = neigh_pairs_max[ii][0]; neighbors[min_id].push_back(max_id); neighbors[max_id].push_back(min_id); } // Sort the neighbors (for efficiency later) for (std::size_t ii=0; ii<n_regs; ++ii) std::sort(neighbors[ii].begin(),neighbors[ii].end()); // Evolve through merging sequence for (std::size_t ii=0; ii<n_merges; ++ii) { unsigned int parent = mer_seq[ii][n_sons_max]; std::vector<unsigned int> all_neighs; std::vector<unsigned int> tmp_neighs; for (std::size_t jj=0; jj<n_sons_max; ++jj) { if (mer_seq[ii][jj]<0) break; vector_union(all_neighs,neighbors[mer_seq[ii][jj]],tmp_neighs); all_neighs = tmp_neighs; } // Descendants (including itself) descendants[parent].push_back(parent); for (std::size_t jj=0; jj<n_sons_max; ++jj) { if (mer_seq[ii][jj]<0) break; descendants[parent].insert(descendants[parent].begin(), descendants[mer_seq[ii][jj]].begin(), descendants[mer_seq[ii][jj]].end()); } std::sort(descendants[parent].begin(),descendants[parent].end()); vector_difference(all_neighs,descendants[parent],neighbors[parent]); // Update all neighbors for (std::size_t jj=0; jj<neighbors[parent].size(); ++jj) { unsigned int curr_neigh = neighbors[parent][jj]; neighbors[curr_neigh].push_back(parent); } } // Siblings for (std::size_t ii=0; ii<n_merges; ++ii) { // Get all regions that are merged at that step std::vector<unsigned int> all_siblings; for (std::size_t jj=0; jj<n_sons_max; ++jj) { if (mer_seq[ii][jj]<0) break; all_siblings.push_back(mer_seq[ii][jj]); } std::sort(all_siblings.begin(),all_siblings.end()); // For each region, add the rest of siblings for (std::size_t jj=0; jj<n_sons_max; ++jj) { if (mer_seq[ii][jj]<0) break; // siblings[mer_seq[ii][jj]] = all_siblings \ curr_reg std::vector<unsigned int> curr_reg(1,mer_seq[ii][jj]); vector_difference(all_siblings,curr_reg,siblings[mer_seq[ii][jj]]); } } // Ascendants for (std::size_t ii=n_merges; ii>0; --ii) { unsigned int parent = mer_seq[ii-1][n_sons_max]; for (std::size_t jj=0; jj<n_sons_max; ++jj) { if (mer_seq[ii-1][jj]<0) break; ascendants[mer_seq[ii-1][jj]].push_back(parent); std::vector<unsigned int> tmp; vector_union(ascendants[mer_seq[ii-1][jj]], ascendants[parent], tmp); ascendants[mer_seq[ii-1][jj]] = tmp; } } // ********************************************************************* // ********************************************************************* // Top-down computation of pairs, triplets, etc. // ********************************************************************* // All new singletons [0], pairs [1], triplets [2], etc. std::vector<std::list<std::vector<unsigned int> > > cands_list(n_reg_cand); std::vector<std:: set<std::vector<unsigned int> > > cands_set(n_reg_cand); // Set to remove duplicates std::vector<unsigned int> coexistent; unsigned int curr_n_reg_max = n_reg_cand; for (std::size_t ii=0; ii<n_merges; ++ii) { std::size_t curr_id = n_merges-ii-1; // Update coexistent // 1-Remove parent std::vector<unsigned int> tmp_coexistent; unsigned int parent = mer_seq[curr_id][n_sons_max]; std::vector<unsigned int> tmp_parent(1,parent); vector_difference(coexistent,tmp_parent,tmp_coexistent); coexistent = tmp_coexistent; // 2-Add children for (std::size_t jj=0; jj<n_sons_max; ++jj) { double child = mer_seq[curr_id][jj]; if (child<0) break; coexistent.push_back(child); } std::sort(coexistent.begin(),coexistent.end()); // All new singletons [0], pairs [1], and triplets [2] std::vector<std::list<std::vector<unsigned int> > > new_cands_list(n_reg_cand); std::vector<std:: set<std::vector<unsigned int> > > new_cands_set(n_reg_cand); // Add new singletons (children) for (std::size_t jj=0; jj<n_sons_max; ++jj) { double child = mer_seq[curr_id][jj]; if (child<0) break; std::vector<unsigned int> to_put(1,child); new_cands_list[0].push_back(to_put); new_cands_set[0].insert(to_put); cands_list[0].push_back(to_put); cands_set[0].insert(to_put); } // Scan singletons to create pairs and pairs to create triplets (if needed), etc. for (std::size_t n_reg_id=1; n_reg_id<curr_n_reg_max; ++n_reg_id) { std::list<std::vector<unsigned int> >::iterator list_it = new_cands_list[n_reg_id-1].begin(); for ( ; list_it!=new_cands_list[n_reg_id-1].end(); ++list_it) { // Regions forming the current candidate. We add regions to them to get from pairs to triplets or from singletons to pairs. std::vector<unsigned int> curr_regs_vec(*list_it); std::set <unsigned int> curr_regs_set(curr_regs_vec.begin(),curr_regs_vec.end()); /******* Up_neighs: All neighbors minus the descendants of all coexistent, ********/ /******* to keep the order of the candidates in the hierarchy. ********/ /******* We also remove the siblings, since they would create a repeated candidate ********/ std::vector<unsigned int> up_neighs; std::vector<unsigned int> tmp_neighs; // Add all neighbors from all regions (and then remove own and descendants) up_neighs = neighbors[curr_regs_vec[0]]; for (std::size_t kk=1; kk<curr_regs_vec.size(); ++kk) { vector_union(up_neighs,neighbors[curr_regs_vec[kk]],tmp_neighs); up_neighs = tmp_neighs; } std::sort(up_neighs.begin(),up_neighs.end()); // Remove own, descendants, and ascendants for (std::size_t kk=0; kk<curr_regs_vec.size(); ++kk) { vector_difference( up_neighs,descendants[curr_regs_vec[kk]],tmp_neighs); vector_difference(tmp_neighs, ascendants[curr_regs_vec[kk]], up_neighs); } // Scan all coexistent for (std::size_t kk=0; kk<coexistent.size(); ++kk) { double coex = coexistent[kk]; if (curr_regs_set.find(coex)==curr_regs_set.end()) { // Get descendants without own coexistent (to keep it as a neighbor) // curr_descendants = descendants[coex] \ curr_coex std::vector<unsigned int> curr_descendants; std::vector<unsigned int> curr_coex(1,coex); vector_difference(descendants[coex],curr_coex,curr_descendants); // Remove descendants from current coexistent // up_neighs = up_neighs \ curr_descendants std::vector<unsigned int> tmp_neighs; vector_difference(up_neighs,curr_descendants,tmp_neighs); up_neighs = tmp_neighs; } } // Remove siblings (just if they are pairs, otherwise we would be missing some of them) std::vector<unsigned int> curr_siblings; for (std::size_t kk=0; kk<curr_regs_vec.size(); ++kk) if (siblings[curr_regs_vec[kk]].size()==1) curr_siblings.push_back(siblings[curr_regs_vec[kk]][0]); std::sort(curr_siblings.begin(),curr_siblings.end()); std::vector<unsigned int> tmp_neighs2; vector_difference(up_neighs,curr_siblings,tmp_neighs2); up_neighs = tmp_neighs2; /******************** Up_neighs updated ********************/ // Store all up_neighs U current regions for (std::size_t kk=0; kk<up_neighs.size(); ++kk) { // to_put = curr_regs_vec U up_neighs[up_neighs.size()-kk-1] std::vector<unsigned int> to_put(curr_regs_vec); to_put.push_back(up_neighs[up_neighs.size()-kk-1]); std::sort(to_put.begin(),to_put.end()); if (new_cands_set[n_reg_id].find(to_put)==new_cands_set[n_reg_id].end()) { new_cands_list[n_reg_id].push_back(to_put); new_cands_set [n_reg_id].insert(to_put); } if (cands_set[n_reg_id].find(to_put)==cands_set[n_reg_id].end()) { cands_list[n_reg_id].push_back(to_put); cands_set [n_reg_id].insert(to_put); } } } } // Update curr_n_reg_max bool done = true; for (std::size_t n_reg_id=n_reg_cand-1; n_reg_id>0; --n_reg_id) { if (cands_set[n_reg_id].size()<num_cands[n_reg_id]) done = false; else if (done) curr_n_reg_max = n_reg_id; } // Are we done? if (done) break; } // ********************************************************************* // Store at output variable cell plhs[0]=mxCreateCellMatrix(n_reg_cand-1, 1); for (std::size_t kk=1; kk<n_reg_cand; ++kk) { // Allocate the space at each slot of the cell std::size_t max_num_cands = cands_set[kk].size(); double curr_num_cands = std::min((double)max_num_cands,(double)num_cands[kk]); mxArray *curr_cands = mxCreateDoubleMatrix(curr_num_cands,kk+1,mxREAL); MatlabMultiArray<double> cands_out(curr_cands); // Copy result to output std::list<std::vector<unsigned int> >::const_iterator list_it = cands_list[kk].begin(); for (std::size_t ii=0; ii<curr_num_cands; ++ii) { for (std::size_t jj=0; jj<kk+1; ++jj) { cands_out[ii][jj] = (*list_it)[jj] + 1; } ++list_it; } // Set each slot of the cell mxSetCell(plhs[0],kk-1,curr_cands); } }
void process_tree(struct rooted_tree *tree, struct parameters params) { struct llist *descendants; switch (params.mode) { case EXACT: descendants = nodes_from_labels(tree, params.labels); if (NULL == descendants) { perror(NULL); exit(EXIT_FAILURE); } if (0 == descendants->count) { fprintf (stderr, "WARNING: no label matches.\n"); /* I don't consider this a failure: it is just the case * that the tree does not contain the specified labels. * */ exit(EXIT_SUCCESS); } break; case REGEXP: descendants = nodes_from_regexp(tree, params.regexp); if (NULL == descendants) { perror(NULL); exit(EXIT_FAILURE); } if (0 == descendants->count) { fprintf (stderr, "WARNING: no match for regexp /%s/\n", params.regexp_string); exit(EXIT_SUCCESS); /** see above */ } break; default: fprintf (stderr, "Unknown mode %d\n", params.mode); exit(EXIT_FAILURE); } /* We need a copy b/c lca() modifies its arg */ struct llist *desc_clone = shallow_copy(descendants); if (NULL == desc_clone) { perror(NULL); exit(EXIT_FAILURE); } struct rnode *subtree_root = lca(tree, desc_clone); if (NULL == subtree_root) { perror(NULL); exit(EXIT_FAILURE); } free(desc_clone); /* elems freed in lca() */ /* Jump up tree to get context, if any was required ('context' > 0) */ int context; for (context = params.context; context > 0; context--) if (! is_root(subtree_root)) subtree_root = subtree_root->parent; // TODO: could not replace to_newick() by dump_newick() due to side // effects. Investigate. if (NULL != subtree_root) { if ((! params.check_monophyly) || (is_monophyletic(descendants, subtree_root))) { /* monophyly of input labels is verified or not * requested */ char *newick; if (params.siblings) { struct llist *sibs = siblings(subtree_root); if (NULL == sibs) { perror(NULL); exit(EXIT_FAILURE); } struct list_elem *el; for (el=sibs->head;NULL!=el;el=el->next) { struct rnode *sib; sib = el->data; newick = to_newick(sib); printf ("%s\n", newick); free(newick); } destroy_llist(sibs); } else { /* normal operation: print clade defined by * labels. */ newick = to_newick(subtree_root); printf ("%s\n", newick); free(newick); } } } else { fprintf (stderr, "WARNING: LCA not found\n"); } destroy_llist(descendants); }