Пример #1
0
void AlignSubFam(SeqVect &vAll, const Tree &GuideTree, unsigned uNodeIndex,
  MSA &msaOut)
	{
	const unsigned uSeqCount = vAll.GetSeqCount();

	const char *InTmp = "asf_in.tmp";
	const char *OutTmp = "asf_out.tmp";

	unsigned *Leaves = new unsigned[uSeqCount];
	unsigned uLeafCount;
	GetLeaves(GuideTree, uNodeIndex, Leaves, &uLeafCount);

	SeqVect v;
	for (unsigned i = 0; i < uLeafCount; ++i)
		{
		unsigned uLeafNodeIndex = Leaves[i];
		unsigned uId = GuideTree.GetLeafId(uLeafNodeIndex);
		Seq &s = vAll.GetSeqById(uId);
		v.AppendSeq(s);
		}

#if	TRACE
	{
	Log("Align subfam[node=%d, size=%d] ", uNodeIndex, uLeafCount);
	for (unsigned i = 0; i < uLeafCount; ++i)
		Log(" %s", v.GetSeqName(i));
	Log("\n");
	}
#endif

	TextFile fIn(InTmp, true);

	v.ToFASTAFile(fIn);
	fIn.Close();

	char CmdLine[4096];
	sprintf(CmdLine, "probcons %s > %s 2> /dev/null", InTmp, OutTmp);
//	sprintf(CmdLine, "muscle -in %s -out %s -maxiters 1", InTmp, OutTmp);
	system(CmdLine);

	TextFile fOut(OutTmp);
	msaOut.FromFile(fOut);

	for (unsigned uSeqIndex = 0; uSeqIndex < uLeafCount; ++uSeqIndex)
		{
		const char *Name = msaOut.GetSeqName(uSeqIndex);
		unsigned uId = vAll.GetSeqIdFromName(Name);
		msaOut.SetSeqId(uSeqIndex, uId);
		}

	unlink(InTmp);
	unlink(OutTmp);

	delete[] Leaves;
	}
Пример #2
0
static void LogLeafNames(const Tree &tree, unsigned uNodeIndex)
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *Leaves = new unsigned[uNodeCount];
	unsigned uLeafCount;
	GetLeaves(tree, uNodeIndex, Leaves, &uLeafCount);
	for (unsigned i = 0; i < uLeafCount; ++i)
		{
		if (i > 0)
			Log(",");
		Log("%s", tree.GetLeafName(Leaves[i]));
		}
	delete[] Leaves;
	}
Пример #3
0
static void LogSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount)
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	Log("%u subfamilies found\n", uSubfamCount);
	Log("Subfam  Sequence\n");
	Log("------  --------\n");
	unsigned *Leaves = new unsigned[uNodeCount];
	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];
		unsigned uLeafCount;
		GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);
		for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)
			Log("%6u  %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));
		Log("\n");
		}
	delete[] Leaves;
	}
Пример #4
0
static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
  const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
  unsigned IdToDiffsLeafNodeIndex[])
	{
#if	TRACE
	Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
	  uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
#endif
	if (bIsDiff[uTreeNodeIndex])
		{
		unsigned uLeafCount = tree.GetLeafCount();
		unsigned *Leaves = new unsigned[uLeafCount];
		GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
		for (unsigned n = 0; n < uLeafCount; ++n)
			{
			const unsigned uLeafNodeIndex = Leaves[n];
			const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
			if (uId >= tree.GetLeafCount())
				Quit("BuildDiffs, id out of range");
			IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
#if	TRACE
			Log("  Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
#endif
			}
		delete[] Leaves;
		return;
		}

	if (tree.IsLeaf(uTreeNodeIndex))
		Quit("BuildDiffs: should never reach leaf");

	const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
	const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);

	const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
	const unsigned uDiffsRight = uDiffsLeft + 1;

	BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
	BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
	}
Пример #5
0
bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
	{
    MuscleContext *ctx = getMuscleContext();
    CLUSTER &g_Cluster2 = ctx->params.g_Cluster2;
    DISTANCE &g_Distance2 =  ctx->params.g_Distance2;
    ROOT &g_Root2 = ctx->params.g_Root2;
    bool &g_bAnchors = ctx->params.g_bAnchors;

	const unsigned uSeqCount = msa.GetSeqCount();
	if (uSeqCount < 3)
		return false;

	const double dMaxHeight = 0.6;
	const unsigned uMaxSubfamCount = 16;
    //const unsigned uNodeCount = tree.GetNodeCount();

	unsigned *Subfams;
	unsigned uSubfamCount;
	GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
	assert(uSubfamCount <= uSeqCount);

	if (ctx->params.g_bVerbose)
		LogSubfams(tree, Subfams, uSubfamCount);

	MSA *SubfamMSAs = new MSA[uSubfamCount];
	unsigned *Leaves = new unsigned[uSeqCount];
	unsigned *Ids = new unsigned[uSeqCount];

	bool bAnyChanges = false;
	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uSubfam = Subfams[uSubfamIndex];
		unsigned uLeafCount;
		GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
		assert(uLeafCount <= uSeqCount);

		LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);

		MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
		MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
		DeleteGappedCols(msaSubfam);

#if	TRACE
		Log("Subfam %u MSA=\n", uSubfamIndex);
		msaSubfam.LogMe();
#endif

		if (msaSubfam.GetSeqCount() <= 2)
			continue;

	// TODO /////////////////////////////////////////
	// Try using existing tree, may actually hurt to
	// re-estimate, may also be a waste of CPU & mem.
	/////////////////////////////////////////////////
		Tree SubfamTree;
		TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);

		bool bAnyChangesThisSubfam;
		if (g_bAnchors)
			bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
		else
			bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
#if	TRACE
		Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
#endif
		if (bAnyChangesThisSubfam)
			bAnyChanges = true;
		}

	if (bAnyChanges)
		ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);

	delete[] Leaves;
	delete[] Subfams;
	delete[] SubfamMSAs;

	return bAnyChanges;
	}
Пример #6
0
    void RefineWorker::_run() {

        unsigned i = 0;
#if TRACE
        algoLog.trace(QString("Worker %1 start. Wait...").arg(QString::number(workerID)));
#endif
        workpool->mainSem.acquire();
#if TRACE
        algoLog.trace(QString("Worker %1: Stop wait. Start (mainSem %2, childSem %3)").arg(QString::number(workerID)).
            arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available())));
#endif
        while(!workpool->isRefineDone())
        {
            MSA msaIn;
            
            i = workpool->refineGetJob(&msaIn, workerID);

            MuscleContext *ctx = workpool->ctx;
//            unsigned &g_uTreeSplitNode1 = ctx->muscle.g_uTreeSplitNode1;
//            unsigned &g_uTreeSplitNode2 = ctx->muscle.g_uTreeSplitNode2;
//            unsigned &g_uRefineHeightSubtree = ctx->refinehoriz.g_uRefineHeightSubtree;
//            unsigned &g_uRefineHeightSubtreeTotal = ctx->refinehoriz.g_uRefineHeightSubtreeTotal;
            Tree &tree = workpool->GuideTree;
            const unsigned uSeqCount = msaIn.GetSeqCount();
//            const unsigned uInternalNodeCount = uSeqCount - 1;
            unsigned *Leaves1 = new unsigned[uSeqCount];
            unsigned *Leaves2 = new unsigned[uSeqCount];
            const unsigned uRootNodeIndex = tree.GetRootNodeIndex();

            while (i != NULL_NEIGHBOR) {

                const unsigned uInternalNodeIndex = workpool->InternalNodeIndexes[i];
                unsigned uNeighborNodeIndex;
                if (tree.IsRoot(uInternalNodeIndex) && !workpool->bRight) {
                    i = workpool->refineGetNextJob(&msaIn, false, -1, i, workerID);
                    continue;
                }
                else if (workpool->bRight)
                    uNeighborNodeIndex = tree.GetRight(uInternalNodeIndex);
                else
                    uNeighborNodeIndex = tree.GetLeft(uInternalNodeIndex);

                //            g_uTreeSplitNode1 = uInternalNodeIndex;
                //            g_uTreeSplitNode2 = uNeighborNodeIndex;

                unsigned uCount1;
                unsigned uCount2;

                GetLeaves(tree, uNeighborNodeIndex, Leaves1, &uCount1);
                GetLeavesExcluding(tree, uRootNodeIndex, uNeighborNodeIndex,
                    Leaves2, &uCount2);

                SCORE scoreBefore;
                SCORE scoreAfter;
                bool bAccepted = TryRealign(msaIn, tree, Leaves1, uCount1, Leaves2, uCount2,
                    &scoreBefore, &scoreAfter, workpool->bLockLeft, workpool->bLockRight);

                SCORE scoreMax = scoreAfter > scoreBefore? scoreAfter : scoreBefore;
                //bool bRepeated = workpool->History->SetScore(workpool->uIter, uInternalNodeIndex, workpool->bRight, scoreMax);
                i = workpool->refineGetNextJob(&msaIn, bAccepted, scoreMax, i, workerID);
            }

            delete[] Leaves1;
            delete[] Leaves2;
#if TRACE
            algoLog.trace(QString("Worker %1: no job available. Wait... (mainSem %2, childSem %3)").arg(QString::number(workerID)).
                arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available())));
#endif         
            workpool->childSem.release();
            workpool->mainSem.acquire();
#if TRACE      
            algoLog.trace(QString("Worker %1: Stop wait. Start (mainSem %2, childSem %3)").arg(QString::number(workerID)).
                arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available())));
#endif         
        }
#if TRACE
        algoLog.trace(QString("Worker %1: Refine done. Exit").arg(QString::number(workerID)));
#endif
    }
Пример #7
0
void CvGBTrees::change_values(CvDTree* tree, const int _k)
{
    CvDTreeNode** predictions = new pCvDTreeNode[get_len(subsample_train)];

    int* sample_data = sample_idx->data.i;
    int* subsample_data = subsample_train->data.i;
    int s_step = (sample_idx->cols > sample_idx->rows) ? 1
                 : sample_idx->step/CV_ELEM_SIZE(sample_idx->type);

    CvMat x;
    CvMat miss_x;

    for (int i=0; i<get_len(subsample_train); ++i)
    {
        int idx = *(sample_data + subsample_data[i]*s_step);
        if (data->tflag == CV_ROW_SAMPLE)
            cvGetRow( data->train_data, &x, idx);
        else
            cvGetCol( data->train_data, &x, idx);

        if (missing)
        {
            if (data->tflag == CV_ROW_SAMPLE)
                cvGetRow( missing, &miss_x, idx);
            else
                cvGetCol( missing, &miss_x, idx);

            predictions[i] = tree->predict(&x, &miss_x);
        }
        else
            predictions[i] = tree->predict(&x);
    }


    CvDTreeNode** leaves;
    int leaves_count = 0;
    leaves = GetLeaves( tree, leaves_count);

    for (int i=0; i<leaves_count; ++i)
    {
        int samples_in_leaf = 0;
        for (int j=0; j<get_len(subsample_train); ++j)
        {
            if (leaves[i] == predictions[j]) samples_in_leaf++;
        }

        if (!samples_in_leaf) // It should not be done anyways! but...
        {
            leaves[i]->value = 0.0;
            continue;
        }

        CvMat* leaf_idx = cvCreateMat(1, samples_in_leaf, CV_32S);
        int* leaf_idx_data = leaf_idx->data.i;

        for (int j=0; j<get_len(subsample_train); ++j)
        {
            int idx = *(sample_data + subsample_data[j]*s_step);
            if (leaves[i] == predictions[j])
                *leaf_idx_data++ = idx;
        }

        float value = find_optimal_value(leaf_idx);
        leaves[i]->value = value;

        leaf_idx_data = leaf_idx->data.i;

        int len = sum_response_tmp->cols;
        for (int j=0; j<get_len(leaf_idx); ++j)
        {
            int idx = leaf_idx_data[j];
            sum_response_tmp->data.fl[idx + _k*len] =
                                    sum_response->data.fl[idx + _k*len] +
                                    params.shrinkage * value;
        }
        leaf_idx_data = 0;
        cvReleaseMat(&leaf_idx);
    }

    // releasing the memory
    for (int i=0; i<get_len(subsample_train); ++i)
    {
        predictions[i] = 0;
    }
    delete[] predictions;

    for (int i=0; i<leaves_count; ++i)
    {
        leaves[i] = 0;
    }
    delete[] leaves;

}