void getParsimonyCost(Tree *tree, Node *node, int base, ParsimonyCell *table) { if (node->nchildren > 0) { Node *left = node->children[0]; Node *right = node->children[1]; left->dist += table[matind(4, node->name, base)].leftcost; right->dist += table[matind(4, node->name, base)].rightcost; // recurse getParsimonyCost(tree, left, table[matind(4, node->name, base)].leftbase, table); getParsimonyCost(tree, right, table[matind(4, node->name, base)].rightbase, table); } }
// transition probability P(j | i, t) void HkyModel::getMatrix(float t, float *matrix) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { // convenience variables // NOTE: it is ok to assign pi_ry, because it is only used when // dnatype[i] == dnatype[j] double a_i, pi_ry; switch (dnatype[i]) { case DNA_PURINE: a_i = a_r; pi_ry = pi_r; break; case DNA_PRYMIDINE: a_i = a_y; pi_ry = pi_y; break; default: assert(0); } int delta_ij = int(i == j); int e_ij = int(dnatype[i] == dnatype[j]); // return transition probability double ait = exp(-a_i*t); double ebt = exp(-b*t); matrix[matind(4,i,j)] = ait*ebt * delta_ij + ebt * (1.0 - ait) * (pi[j]*e_ij/pi_ry) + (1.0 - ebt) * pi[j]; } } }
// transition probability P(j | i, t) void HkyModelDeriv::getMatrix(float t, float *matrix) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { // convenience variables // NOTE: it is ok to assign pi_ry, because it is only used when // dnatype[i] == dnatype[j] float a_i, pi_ry; switch (dnatype[i]) { case DNA_PURINE: a_i = a_r; pi_ry = pi_r; break; case DNA_PRYMIDINE: a_i = a_y; pi_ry = pi_y; break; default: assert(0); } int delta_ij = int(i == j); int e_ij = int(dnatype[i] == dnatype[j]); // return transition probability float ab = a_i + b; float eabt = expf(-ab*t); float ebt = expf(-b*t); matrix[matind(4, i, j)] = - delta_ij * ab * eabt + (pi[j] * e_ij / pi_ry) * (- b * ebt + ab * eabt) + pi[j] * b * ebt; } } }
double solve() { const int numcols = vars_.size(); const int numrows = bnd_.size(); int status; lp_ = CPXcreateprob(env_, &status, "PRactIP"); if (lp_==NULL) throw std::runtime_error("failed to create LP"); unsigned int n_nonzero=0; for (unsigned int i=0; i!=m_.size(); ++i) n_nonzero += m_[i].size(); std::vector<int> matbeg(numcols, 0); std::vector<int> matcnt(numcols, 0); std::vector<int> matind(n_nonzero); std::vector<double> matval(n_nonzero); for (unsigned int i=0, k=0; i!=m_.size(); ++i) { matbeg[i] = i==0 ? 0 : matbeg[i-1]+matcnt[i-1]; matcnt[i] = m_[i].size(); for (unsigned int j=0; j!=m_[i].size(); ++j, ++k) { matind[k] = m_[i][j].first; matval[k] = m_[i][j].second; } } m_.clear(); status = CPXcopylp(env_, lp_, numcols, numrows, dir_==IP::MIN ? CPX_MIN : CPX_MAX, &coef_[0], &rhs_[0], &bnd_[0], &matbeg[0], &matcnt[0], &matind[0], &matval[0], &vlb_[0], &vub_[0], &rngval_[0] ); vlb_.clear(); vub_.clear(); status = CPXcopyctype(env_, lp_, &vars_[0]); vars_.clear(); CPXsetintparam(env_, CPXPARAM_MIP_Display, 0); CPXsetintparam(env_, CPXPARAM_Barrier_Display, 0); CPXsetintparam(env_, CPXPARAM_Tune_Display, 0); CPXsetintparam(env_, CPXPARAM_Network_Display, 0); CPXsetintparam(env_, CPXPARAM_Sifting_Display, 0); CPXsetintparam(env_, CPXPARAM_Simplex_Display, 0); status = CPXmipopt(env_, lp_); double objval; status = CPXgetobjval(env_, lp_, &objval); res_cols_.resize(CPXgetnumcols(env_, lp_)); status = CPXgetx(env_, lp_, &res_cols_[0], 0, res_cols_.size()-1); return objval; }
// assume binary tree void parsimony_helper(Tree *tree, int nseqs, char **seqs, ParsimonyCell *table, int *postorder) { for (int ii=nseqs; ii<tree->nnodes; ii++) { int i = postorder[ii]; int left = tree->nodes[i]->children[0]->name; int right = tree->nodes[i]->children[1]->name; // process this node for (int a=0; a<4; a++) { int minleft = 0, minright = 0; float minleftcost = MAX_COST, minrightcost = MAX_COST; float leftsub = 0; float rightsub = 0; //float leftmatch = 2; //float rightmatch = 2; for (int b=0; b<4; b++) { float sub = subcost[a][b]; float leftcost = table[matind(4, left, b)].cost + sub; float rightcost = table[matind(4, right, b)].cost + sub; // find min_b leftcost(b) if (leftcost < minleftcost) // || // (leftcost == minleftcost && // frand() < (1.0/leftmatch))) { //if (leftcost == minleftcost) // leftmatch += 1; //else // leftmatch = 2; minleftcost = leftcost; minleft = b; leftsub = sub; } // find min_b rightcost(b) if (rightcost < minrightcost) // || // (rightcost == minrightcost && // frand() < (1.0/rightmatch))) { //if (rightcost == minrightcost) // rightmatch += 1; //else // rightmatch = 2; minrightcost = rightcost; minright = b; rightsub = sub; } } // save cost and pointers int k = matind(4, i, a); table[k].cost = minleftcost + minrightcost; table[k].leftcost = leftsub; table[k].rightcost = rightsub; table[k].leftbase = minleft; table[k].rightbase = minright; table[k].gap = table[matind(4, left, 0)].gap && \ table[matind(4, right, 0)].gap; } } }
void parsimony(Tree *tree, int nseqs, char **seqs, bool buildAncestral, char **ancetralSeqs) { int seqlen = strlen(seqs[0]); // allocate dynamic table ParsimonyCell *table = new ParsimonyCell [tree->nnodes * 4]; int *gapless = new int [tree->nnodes]; // initalize distances for (int i=0; i<tree->nnodes; i++) { tree->nodes[i]->dist = 0.0; gapless[i] = 0; } // get recursion order ExtendArray<int> postorder(0, tree->nnodes); getPostOrder(tree, &postorder); for (int i=0; i<seqlen; i++) { // initialize leaves // iterate just over the leaves for (int j=0; j<nseqs; j++) { int base = dna2int[(int) (unsigned char) seqs[j][i]]; if (base == -1) { // gap for (int k=0; k<4; k++) { table[matind(4, j, k)].cost = 0; table[matind(4, j, k)].gap = true; } } else { for (int k=0; k<4; k++) { table[matind(4, j, k)].cost = MAX_COST; table[matind(4, j, k)].gap = false; } table[matind(4, j, base)].cost = 0; } } // populate cost table parsimony_helper(tree, nseqs, seqs, table, postorder); // find min cost at root float mincost = MAX_COST; int minbase= 0; int root = tree->root->name; for (int a=0; a<4; a++) { if (table[matind(4, root, a)].cost < mincost) { mincost = table[matind(4, root, a)].cost; minbase = a; } } // add up dist getParsimonyCost(tree, tree->root, minbase, table); // add up ungapped chars for (int j=0; j<tree->nnodes; j++) { gapless[j] += table[matind(4, j, 0)].gap ? 0 : 1; } } // divide subsitutions by number of sites for (int i=0; i<tree->nnodes; i++) if (gapless[i] != 0.0) tree->nodes[i]->dist /= gapless[i]; // place root in middle of top branch Node *rootnode = tree->root; float totlen = rootnode->children[0]->dist + rootnode->children[1]->dist; rootnode->children[0]->dist = totlen / 2.0; rootnode->children[1]->dist = totlen / 2.0; // cleanup delete [] table; delete [] gapless; }