void AlignSubFam(SeqVect &vAll, const Tree &GuideTree, unsigned uNodeIndex, MSA &msaOut) { const unsigned uSeqCount = vAll.GetSeqCount(); const char *InTmp = "asf_in.tmp"; const char *OutTmp = "asf_out.tmp"; unsigned *Leaves = new unsigned[uSeqCount]; unsigned uLeafCount; GetLeaves(GuideTree, uNodeIndex, Leaves, &uLeafCount); SeqVect v; for (unsigned i = 0; i < uLeafCount; ++i) { unsigned uLeafNodeIndex = Leaves[i]; unsigned uId = GuideTree.GetLeafId(uLeafNodeIndex); Seq &s = vAll.GetSeqById(uId); v.AppendSeq(s); } #if TRACE { Log("Align subfam[node=%d, size=%d] ", uNodeIndex, uLeafCount); for (unsigned i = 0; i < uLeafCount; ++i) Log(" %s", v.GetSeqName(i)); Log("\n"); } #endif TextFile fIn(InTmp, true); v.ToFASTAFile(fIn); fIn.Close(); char CmdLine[4096]; sprintf(CmdLine, "probcons %s > %s 2> /dev/null", InTmp, OutTmp); // sprintf(CmdLine, "muscle -in %s -out %s -maxiters 1", InTmp, OutTmp); system(CmdLine); TextFile fOut(OutTmp); msaOut.FromFile(fOut); for (unsigned uSeqIndex = 0; uSeqIndex < uLeafCount; ++uSeqIndex) { const char *Name = msaOut.GetSeqName(uSeqIndex); unsigned uId = vAll.GetSeqIdFromName(Name); msaOut.SetSeqId(uSeqIndex, uId); } unlink(InTmp); unlink(OutTmp); delete[] Leaves; }
static void LogLeafNames(const Tree &tree, unsigned uNodeIndex) { const unsigned uNodeCount = tree.GetNodeCount(); unsigned *Leaves = new unsigned[uNodeCount]; unsigned uLeafCount; GetLeaves(tree, uNodeIndex, Leaves, &uLeafCount); for (unsigned i = 0; i < uLeafCount; ++i) { if (i > 0) Log(","); Log("%s", tree.GetLeafName(Leaves[i])); } delete[] Leaves; }
static void LogSubfams(const Tree &tree, const unsigned Subfams[], unsigned uSubfamCount) { const unsigned uNodeCount = tree.GetNodeCount(); Log("%u subfamilies found\n", uSubfamCount); Log("Subfam Sequence\n"); Log("------ --------\n"); unsigned *Leaves = new unsigned[uNodeCount]; for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex) { unsigned uSubfamNodeIndex = Subfams[uSubfamIndex]; unsigned uLeafCount; GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount); for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex) Log("%6u %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex])); Log("\n"); } delete[] Leaves; }
static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex, const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex, unsigned IdToDiffsLeafNodeIndex[]) { #if TRACE Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n", uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex)); #endif if (bIsDiff[uTreeNodeIndex]) { unsigned uLeafCount = tree.GetLeafCount(); unsigned *Leaves = new unsigned[uLeafCount]; GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount); for (unsigned n = 0; n < uLeafCount; ++n) { const unsigned uLeafNodeIndex = Leaves[n]; const unsigned uId = tree.GetLeafId(uLeafNodeIndex); if (uId >= tree.GetLeafCount()) Quit("BuildDiffs, id out of range"); IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex; #if TRACE Log(" Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex); #endif } delete[] Leaves; return; } if (tree.IsLeaf(uTreeNodeIndex)) Quit("BuildDiffs: should never reach leaf"); const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex); const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex); const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex); const unsigned uDiffsRight = uDiffsLeft + 1; BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex); BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex); }
bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters) { MuscleContext *ctx = getMuscleContext(); CLUSTER &g_Cluster2 = ctx->params.g_Cluster2; DISTANCE &g_Distance2 = ctx->params.g_Distance2; ROOT &g_Root2 = ctx->params.g_Root2; bool &g_bAnchors = ctx->params.g_bAnchors; const unsigned uSeqCount = msa.GetSeqCount(); if (uSeqCount < 3) return false; const double dMaxHeight = 0.6; const unsigned uMaxSubfamCount = 16; //const unsigned uNodeCount = tree.GetNodeCount(); unsigned *Subfams; unsigned uSubfamCount; GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount); assert(uSubfamCount <= uSeqCount); if (ctx->params.g_bVerbose) LogSubfams(tree, Subfams, uSubfamCount); MSA *SubfamMSAs = new MSA[uSubfamCount]; unsigned *Leaves = new unsigned[uSeqCount]; unsigned *Ids = new unsigned[uSeqCount]; bool bAnyChanges = false; for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex) { unsigned uSubfam = Subfams[uSubfamIndex]; unsigned uLeafCount; GetLeaves(tree, uSubfam, Leaves, &uLeafCount); assert(uLeafCount <= uSeqCount); LeafIndexesToIds(tree, Leaves, uLeafCount, Ids); MSA &msaSubfam = SubfamMSAs[uSubfamIndex]; MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam); DeleteGappedCols(msaSubfam); #if TRACE Log("Subfam %u MSA=\n", uSubfamIndex); msaSubfam.LogMe(); #endif if (msaSubfam.GetSeqCount() <= 2) continue; // TODO ///////////////////////////////////////// // Try using existing tree, may actually hurt to // re-estimate, may also be a waste of CPU & mem. ///////////////////////////////////////////////// Tree SubfamTree; TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2); bool bAnyChangesThisSubfam; if (g_bAnchors) bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters); else bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false); #if TRACE Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam); #endif if (bAnyChangesThisSubfam) bAnyChanges = true; } if (bAnyChanges) ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa); delete[] Leaves; delete[] Subfams; delete[] SubfamMSAs; return bAnyChanges; }
void RefineWorker::_run() { unsigned i = 0; #if TRACE algoLog.trace(QString("Worker %1 start. Wait...").arg(QString::number(workerID))); #endif workpool->mainSem.acquire(); #if TRACE algoLog.trace(QString("Worker %1: Stop wait. Start (mainSem %2, childSem %3)").arg(QString::number(workerID)). arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available()))); #endif while(!workpool->isRefineDone()) { MSA msaIn; i = workpool->refineGetJob(&msaIn, workerID); MuscleContext *ctx = workpool->ctx; // unsigned &g_uTreeSplitNode1 = ctx->muscle.g_uTreeSplitNode1; // unsigned &g_uTreeSplitNode2 = ctx->muscle.g_uTreeSplitNode2; // unsigned &g_uRefineHeightSubtree = ctx->refinehoriz.g_uRefineHeightSubtree; // unsigned &g_uRefineHeightSubtreeTotal = ctx->refinehoriz.g_uRefineHeightSubtreeTotal; Tree &tree = workpool->GuideTree; const unsigned uSeqCount = msaIn.GetSeqCount(); // const unsigned uInternalNodeCount = uSeqCount - 1; unsigned *Leaves1 = new unsigned[uSeqCount]; unsigned *Leaves2 = new unsigned[uSeqCount]; const unsigned uRootNodeIndex = tree.GetRootNodeIndex(); while (i != NULL_NEIGHBOR) { const unsigned uInternalNodeIndex = workpool->InternalNodeIndexes[i]; unsigned uNeighborNodeIndex; if (tree.IsRoot(uInternalNodeIndex) && !workpool->bRight) { i = workpool->refineGetNextJob(&msaIn, false, -1, i, workerID); continue; } else if (workpool->bRight) uNeighborNodeIndex = tree.GetRight(uInternalNodeIndex); else uNeighborNodeIndex = tree.GetLeft(uInternalNodeIndex); // g_uTreeSplitNode1 = uInternalNodeIndex; // g_uTreeSplitNode2 = uNeighborNodeIndex; unsigned uCount1; unsigned uCount2; GetLeaves(tree, uNeighborNodeIndex, Leaves1, &uCount1); GetLeavesExcluding(tree, uRootNodeIndex, uNeighborNodeIndex, Leaves2, &uCount2); SCORE scoreBefore; SCORE scoreAfter; bool bAccepted = TryRealign(msaIn, tree, Leaves1, uCount1, Leaves2, uCount2, &scoreBefore, &scoreAfter, workpool->bLockLeft, workpool->bLockRight); SCORE scoreMax = scoreAfter > scoreBefore? scoreAfter : scoreBefore; //bool bRepeated = workpool->History->SetScore(workpool->uIter, uInternalNodeIndex, workpool->bRight, scoreMax); i = workpool->refineGetNextJob(&msaIn, bAccepted, scoreMax, i, workerID); } delete[] Leaves1; delete[] Leaves2; #if TRACE algoLog.trace(QString("Worker %1: no job available. Wait... (mainSem %2, childSem %3)").arg(QString::number(workerID)). arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available()))); #endif workpool->childSem.release(); workpool->mainSem.acquire(); #if TRACE algoLog.trace(QString("Worker %1: Stop wait. Start (mainSem %2, childSem %3)").arg(QString::number(workerID)). arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available()))); #endif } #if TRACE algoLog.trace(QString("Worker %1: Refine done. Exit").arg(QString::number(workerID))); #endif }
void CvGBTrees::change_values(CvDTree* tree, const int _k) { CvDTreeNode** predictions = new pCvDTreeNode[get_len(subsample_train)]; int* sample_data = sample_idx->data.i; int* subsample_data = subsample_train->data.i; int s_step = (sample_idx->cols > sample_idx->rows) ? 1 : sample_idx->step/CV_ELEM_SIZE(sample_idx->type); CvMat x; CvMat miss_x; for (int i=0; i<get_len(subsample_train); ++i) { int idx = *(sample_data + subsample_data[i]*s_step); if (data->tflag == CV_ROW_SAMPLE) cvGetRow( data->train_data, &x, idx); else cvGetCol( data->train_data, &x, idx); if (missing) { if (data->tflag == CV_ROW_SAMPLE) cvGetRow( missing, &miss_x, idx); else cvGetCol( missing, &miss_x, idx); predictions[i] = tree->predict(&x, &miss_x); } else predictions[i] = tree->predict(&x); } CvDTreeNode** leaves; int leaves_count = 0; leaves = GetLeaves( tree, leaves_count); for (int i=0; i<leaves_count; ++i) { int samples_in_leaf = 0; for (int j=0; j<get_len(subsample_train); ++j) { if (leaves[i] == predictions[j]) samples_in_leaf++; } if (!samples_in_leaf) // It should not be done anyways! but... { leaves[i]->value = 0.0; continue; } CvMat* leaf_idx = cvCreateMat(1, samples_in_leaf, CV_32S); int* leaf_idx_data = leaf_idx->data.i; for (int j=0; j<get_len(subsample_train); ++j) { int idx = *(sample_data + subsample_data[j]*s_step); if (leaves[i] == predictions[j]) *leaf_idx_data++ = idx; } float value = find_optimal_value(leaf_idx); leaves[i]->value = value; leaf_idx_data = leaf_idx->data.i; int len = sum_response_tmp->cols; for (int j=0; j<get_len(leaf_idx); ++j) { int idx = leaf_idx_data[j]; sum_response_tmp->data.fl[idx + _k*len] = sum_response->data.fl[idx + _k*len] + params.shrinkage * value; } leaf_idx_data = 0; cvReleaseMat(&leaf_idx); } // releasing the memory for (int i=0; i<get_len(subsample_train); ++i) { predictions[i] = 0; } delete[] predictions; for (int i=0; i<leaves_count; ++i) { leaves[i] = 0; } delete[] leaves; }