stList *chooseMatching_greedy(stList *edges, int64_t nodeNumber) { /* * Greedily picks the edge from the list such that each node has at most one edge. */ //First clone the list.. edges = stList_copy(edges, NULL); stSortedSet *seen = getEmptyNodeOrEdgeSetWithCleanup(); stList *matching = stList_construct(); //Sort the adjacency pairs.. stList_sort(edges, chooseMatching_greedyP); double strength = INT64_MAX; while (stList_length(edges) > 0) { stIntTuple *edge = stList_pop(edges); double d = stIntTuple_get(edge, 2); assert(d <= strength); strength = d; if(!nodeInSet(seen, stIntTuple_get(edge, 0)) && !nodeInSet(seen, stIntTuple_get(edge, 1))) { addNodeToSet(seen, stIntTuple_get(edge, 0)); addNodeToSet(seen, stIntTuple_get(edge, 1)); stList_append(matching,edge); } } assert(stList_length(edges) == 0); stList_destruct(edges); stSortedSet_destruct(seen); return matching; }
static stList *mergeSubstrings(stList *substrings, int64_t proximityToMerge) { /* * Merge set of substrings into fewer substrings, if they overlap by less than proximityToMerge */ stList *mergedSubstrings = stList_construct3(0, (void (*)(void *)) substring_destruct); if (stList_length(substrings) == 0) { return mergedSubstrings; } stList_sort(substrings, (int (*)(const void *, const void *)) substring_cmp); Substring *pSubsequence = substring_clone(stList_get(substrings, 0)); stList_append(mergedSubstrings, pSubsequence); for (int64_t i = 1; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); if (pSubsequence->name == substring->name && pSubsequence->start + pSubsequence->length + proximityToMerge >= substring->start) { //Merge if (pSubsequence->start + pSubsequence->length < substring->start + substring->length) { pSubsequence->length = substring->start + substring->length - pSubsequence->start; } } else { pSubsequence = substring_clone(substring); stList_append(mergedSubstrings, pSubsequence); } } return mergedSubstrings; }
void stCaf_addAdjacencies(Flower *flower) { //Build a list of caps. stList *list = stList_construct(); Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIterator)) != NULL) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { if (!cap_getStrand(cap)) { cap = cap_getReverse(cap); } stList_append(list, cap); } end_destructInstanceIterator(instanceIterator); } flower_destructEndIterator(endIterator); assert(stList_length(list) % 2 == 0); //Sort the list of caps. stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP); //Now make the adjacencies. for (int64_t i = 1; i < stList_length(list); i += 2) { Cap *cap = stList_get(list, i - 1); Cap *cap2 = stList_get(list, i); cap_makeAdjacent(cap, cap2); } //Clean up. stList_destruct(list); }
void stTree_sortChildren(stTree *root, int cmpFn(stTree *a, stTree *b)) { sortChildrenCmpFn = cmpFn; stList_sort(root->nodes, sortChildrenListCmpFn); sortChildrenCmpFn = NULL; for (int i = 0; i < stTree_getChildNumber(root); i++) { stTree_sortChildren(stTree_getChild(root, i), cmpFn); } }
static AdjacencySwitch *getBest2EdgeAdjacencySwitch(stList *components, stSortedSet *allAdjacencyEdges) { /* * Look for the two lowest value adjacency edges in all current edges that are in a separate component and returns them as an adjacency switch * with now new adjacency edges. */ /* * Get lowest scoring edge for each component. */ stList *lowestScoringEdgeFromEachComponent = stList_construct(); for (int64_t i = 0; i < stList_length(components); i++) { stList_append(lowestScoringEdgeFromEachComponent, getLowestScoringEdge(stList_get(components, i))); } /* * Get two lowest scoring edges. */ stList_sort(lowestScoringEdgeFromEachComponent, getBest2EdgeAdjacencySwitchP); stIntTuple *lowestScoreEdge1 = stList_get( lowestScoringEdgeFromEachComponent, 0); stIntTuple *lowestScoreEdge2 = stList_get( lowestScoringEdgeFromEachComponent, 1); assert(lowestScoreEdge1 != lowestScoreEdge2); stList_destruct(lowestScoringEdgeFromEachComponent); //Cleanup stIntTuple *newEdge1 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 0), stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges); stIntTuple *newEdge2 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 1), stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges); if (newEdge1 == NULL) { assert(newEdge2 == NULL); newEdge1 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 0), stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges); newEdge2 = getWeightedEdgeFromSet( stIntTuple_get(lowestScoreEdge1, 1), stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges); } assert(newEdge1 != NULL); assert(newEdge2 != NULL); return adjacencySwitch_construct( lowestScoreEdge1, lowestScoreEdge2, newEdge1, newEdge2, stIntTuple_get(lowestScoreEdge1, 2) + stIntTuple_get(lowestScoreEdge2, 2)); }
void test_stList_sort(CuTest *testCase) { setup(); stList_sort(list, (int (*)(const void *, const void *))strcmp); CuAssertTrue(testCase, stList_length(list) == stringNumber); CuAssertStrEquals(testCase, "five", stList_get(list, 0)); CuAssertStrEquals(testCase, "four", stList_get(list, 1)); CuAssertStrEquals(testCase, "one", stList_get(list, 2)); CuAssertStrEquals(testCase, "three", stList_get(list, 3)); CuAssertStrEquals(testCase, "two", stList_get(list, 4)); teardown(); }
/* build a list of blocks, sorted by the root components */ static stList *buildRootSorted(struct malnSet *malnSet) { stList *sorted = stList_construct(); struct malnBlkSetIterator *iter = malnBlkSet_getIterator(malnSet->blks); struct malnBlk *blk; while ((blk = malnBlkSetIterator_getNext(iter)) != NULL) { stList_append(sorted, blk); } malnBlkSetIterator_destruct(iter); stList_sort(sorted, blkCmpRootComp); return sorted; }
/* Get a list of components that overlap the specified guide range and are in * blocks not flagged as dying and matches treeLoc filters. Return NULL if no * overlaps. List is sorted by ascending width, which helps the merge * efficiency. */ stList *malnSet_getOverlappingComps(struct malnSet *malnSet, struct Seq *seq, int chromStart, int chromEnd, unsigned treeLocFilter) { if (malnSet->compRangeMap == NULL) { buildRangeTree(malnSet); } stList *overComps = NULL; for (struct range *rng = genomeRangeTreeAllOverlapping(malnSet->compRangeMap, seq->orgSeqName, chromStart, chromEnd); rng != NULL; rng = rng->next) { for (struct slRef *compRef = rng->val; compRef != NULL; compRef = compRef->next) { struct malnComp *comp = compRef->val; if (keepOverlap(comp, seq, chromStart, chromEnd, treeLocFilter)) { if (overComps == NULL) { overComps = stList_construct(); } stList_append(overComps, comp); } } } // sort so tests are reproducible if (overComps != NULL) { stList_sort(overComps, sortCompListCmpFn); } return overComps; }
int main(int argc, char *argv[]) { // Parse arguments if (argc != 3) { usage(argv); return 1; } // You would load a custom HMM here if you wanted using // hmm_getStateMachine (see the realign code) StateMachine *stateMachine = stateMachine5_construct(fiveState); PairwiseAlignmentParameters *parameters = pairwiseAlignmentBandingParameters_construct(); stHash *targetSequences = readFastaFile(argv[1]); stHash *querySequences = readFastaFile(argv[2]); // For each query sequence, align it against all target sequences. stHashIterator *queryIt = stHash_getIterator(querySequences); char *queryHeader; while ((queryHeader = stHash_getNext(queryIt)) != NULL) { char *querySeq = stHash_search(querySequences, queryHeader); stHashIterator *targetIt = stHash_getIterator(targetSequences); char *targetHeader; while ((targetHeader = stHash_getNext(targetIt)) != NULL) { char *targetSeq = stHash_search(targetSequences, targetHeader); // Here we should try both the target sequence and its // reverse-complemented version // Aligns the sequences. // If you have alignment constraints (anchors) you should // replace this with getAlignedPairsUsingAnchors. stList *alignedPairs = getAlignedPairs(stateMachine, targetSeq, querySeq, parameters, true, true); // Takes into account the probability of aligning to a // gap, by transforming the posterior probability into the // AMAP objective function (see Schwartz & Pachter, 2007). alignedPairs = reweightAlignedPairs2(alignedPairs, strlen(targetSeq), strlen(querySeq), parameters->gapGamma); // I think this calculates the optimal ordered set of // alignments from the unordered set of aligned pairs, not // completely sure. alignedPairs = filterPairwiseAlignmentToMakePairsOrdered(alignedPairs, targetSeq, querySeq, // This parameter says that the minimum posterior probability we will accept has to be at least 0.9. 0.9); // After this the "aligned pairs" data structure changes, // which is a little sketchy. It's just so that the // alignment can be printed properly. stList_mapReplace(alignedPairs, convertToAnchorPair, NULL); stList_sort(alignedPairs, (int (*)(const void *, const void *)) stIntTuple_cmpFn); struct PairwiseAlignment *alignment = convertAlignedPairsToPairwiseAlignment(targetHeader, queryHeader, 0, strlen(targetSeq), strlen(querySeq), alignedPairs); // Output the cigar string cigarWrite(stdout, alignment, 0); stList_destruct(alignedPairs); destructPairwiseAlignment(alignment); } stHash_destructIterator(targetIt); } stHash_destructIterator(queryIt); // Clean up stHash_destruct(targetSequences); stHash_destruct(querySequences); pairwiseAlignmentBandingParameters_destruct(parameters); stateMachine_destruct(stateMachine); }