stList *chooseMatching_greedy(stList *edges, int64_t nodeNumber) { /* * Greedily picks the edge from the list such that each node has at most one edge. */ //First clone the list.. edges = stList_copy(edges, NULL); stSortedSet *seen = getEmptyNodeOrEdgeSetWithCleanup(); stList *matching = stList_construct(); //Sort the adjacency pairs.. stList_sort(edges, chooseMatching_greedyP); double strength = INT64_MAX; while (stList_length(edges) > 0) { stIntTuple *edge = stList_pop(edges); double d = stIntTuple_get(edge, 2); assert(d <= strength); strength = d; if(!nodeInSet(seen, stIntTuple_get(edge, 0)) && !nodeInSet(seen, stIntTuple_get(edge, 1))) { addNodeToSet(seen, stIntTuple_get(edge, 0)); addNodeToSet(seen, stIntTuple_get(edge, 1)); stList_append(matching,edge); } } assert(stList_length(edges) == 0); stList_destruct(edges); stSortedSet_destruct(seen); return matching; }
void test_stList_pop(CuTest *testCase) { setup(); int64_t i; for(i=stringNumber-1; i>=0; i--) { CuAssertTrue(testCase, stList_pop(list) == strings[i]); CuAssertTrue(testCase, stList_length(list) == i); } teardown(); }
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) { for(int64_t trial=0; trial<100; trial++) { setup(); //Make random number of sequences. stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct); for(int64_t i=0; i<sequenceNumber; i++) { stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE))); } //Propose random alignment pairs... stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct); int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS); if(sequenceNumber > 0) { for(int64_t i=0; i<maxAlignedPairs; i++) { int64_t seq1 = st_randomInt(0, sequenceNumber); int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength1 == 0) { continue; } int64_t position1 = st_randomInt(0, seqLength1); int64_t seq2 = st_randomInt(0, sequenceNumber); int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0); if(seqLength2 == 0) { continue; } int64_t position2 = st_randomInt(0, seqLength2); if(seq1 != seq2) { stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2)); if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) { st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each accepted pair check it doesn't create a cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2)); } else { st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2); //For each rejected pair check it creates a cycle.. CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber)); CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)); stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle. CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay.. } } } } //Cleanup stList_destruct(sequenceLengths); stList_destruct(pairs); teardown(); st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs); } }
/* clone the root. */ static stTree *subrangeCloneRoot(stTree *srcRoot, struct malnCompCompMap *srcDestCompMap) { // clone root, if deleted, these must only be one child (due to the way // the trees are constructed). stList *pendingSubtrees = stList_construct(); stTree *destRoot = subrangeCloneNode(srcRoot, srcDestCompMap, pendingSubtrees); if (destRoot == NULL) { if (stList_length(pendingSubtrees) > 1) { struct mafTreeNodeCompLink *srcNcLink = getNodeCompLink(srcRoot); errAbort("deleted tree root %s (component: %s:%d-%d/%c)) has more that one child", stTree_getLabel(srcRoot), srcNcLink->comp->seq->orgSeqName, srcNcLink->comp->start, srcNcLink->comp->end, srcNcLink->comp->strand); } else if (stList_length(pendingSubtrees) == 1) { destRoot = stList_pop(pendingSubtrees); } } stList_destruct(pendingSubtrees); return destRoot; }
// copied from cPecanRealign struct PairwiseAlignment *convertAlignedPairsToPairwiseAlignment(char *seqName1, char *seqName2, double score, int64_t length1, int64_t length2, stList *alignedPairs) { //Make pairwise alignment int64_t pX = -1, pY = -1, mL = 0; //Create an end matched pair, which is used to ensure the alignment has the correct end indels. struct List *opList = constructEmptyList(0, (void (*)(void *)) destructAlignmentOperation); stList_append(alignedPairs, stIntTuple_construct2(length1, length2)); for (int64_t i = 0; i < stList_length(alignedPairs); i++) { stIntTuple *alignedPair = stList_get(alignedPairs, i); int64_t x = stIntTuple_get(alignedPair, 0); int64_t y = stIntTuple_get(alignedPair, 1); assert(x - pX > 0); assert(y - pY > 0); if (x - pX > 0 && y - pY > 0) { //This is a hack for filtering if (x - pX > 1) { //There is an indel. if (mL > 0) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0)); mL = 0; } listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_X, x - pX - 1, 0)); } if (y - pY > 1) { if (mL > 0) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0)); mL = 0; } listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_Y, y - pY - 1, 0)); } mL++; pX = x; pY = y; } } //Deal with a trailing match, but exclude the final match if (mL > 1) { listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL - 1, 0)); } stIntTuple_destruct(stList_pop(alignedPairs)); //Construct the alignment struct PairwiseAlignment *pA = constructPairwiseAlignment(seqName1, 0, length1, 1, seqName2, 0, length2, 1, score, opList); return pA; }
stList *getComponents(stList *edges) { /* * Gets a list of connected components, each connected component * being represented as a list of the edges, such that each edge is in exactly one * connected component. Allows for multi-graphs (multiple edges connecting two nodes). */ stHash *nodesToEdges = getNodesToEdgesHash(edges); /* * Traverse the edges greedily */ stList *components = stList_construct3(0, (void(*)(void *)) stList_destruct); stList *nodes = stHash_getKeys(nodesToEdges); while (stList_length(nodes) > 0) { stIntTuple *node = stList_pop(nodes); stList *edges = stHash_search(nodesToEdges, node); if (edges != NULL) { //We have a component to build stSortedSet *component = stSortedSet_construct(); stHash_remove(nodesToEdges, node); for (int64_t i = 0; i < stList_length(edges); i++) { stIntTuple *edge = stList_get(edges, i); getComponentsP(nodesToEdges, stIntTuple_get(edge, 0), component); getComponentsP(nodesToEdges, stIntTuple_get(edge, 1), component); } stList_append(components, stSortedSet_getList(component)); //Cleanup stSortedSet_destruct(component); stList_destruct(edges); } stIntTuple_destruct(node); } assert(stHash_size(nodesToEdges) == 0); stHash_destruct(nodesToEdges); stList_destruct(nodes); return components; }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }
/* add children from pending list */ static void subrangeAddPendingChildren(stTree *destNode, stList *pendingSubtrees) { while (stList_length(pendingSubtrees) > 0) { stTree *destChild = stList_pop(pendingSubtrees); stTree_setParent(destChild, destNode); } }