stList *chooseMatching_greedy(stList *edges, int64_t nodeNumber) {
    /*
     * Greedily picks the edge from the list such that each node has at most one edge.
     */
    //First clone the list..
    edges = stList_copy(edges, NULL);

    stSortedSet *seen = getEmptyNodeOrEdgeSetWithCleanup();
    stList *matching = stList_construct();

    //Sort the adjacency pairs..
    stList_sort(edges, chooseMatching_greedyP);

    double strength = INT64_MAX;
    while (stList_length(edges) > 0) {
        stIntTuple *edge = stList_pop(edges);
        double d = stIntTuple_get(edge, 2);
        assert(d <= strength);
        strength = d;
        if(!nodeInSet(seen, stIntTuple_get(edge, 0)) && !nodeInSet(seen, stIntTuple_get(edge, 1))) {
            addNodeToSet(seen, stIntTuple_get(edge, 0));
            addNodeToSet(seen, stIntTuple_get(edge, 1));
            stList_append(matching,edge);
        }
    }
    assert(stList_length(edges) == 0);
    stList_destruct(edges);
    stSortedSet_destruct(seen);

    return matching;
}
Esempio n. 2
0
static stList *mergeSubstrings(stList *substrings, int64_t proximityToMerge) {
    /*
     * Merge set of substrings into fewer substrings, if they overlap by less than proximityToMerge
     */
    stList *mergedSubstrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    if (stList_length(substrings) == 0) {
        return mergedSubstrings;
    }
    stList_sort(substrings, (int (*)(const void *, const void *)) substring_cmp);
    Substring *pSubsequence = substring_clone(stList_get(substrings, 0));
    stList_append(mergedSubstrings, pSubsequence);
    for (int64_t i = 1; i < stList_length(substrings); i++) {
        Substring *substring = stList_get(substrings, i);
        if (pSubsequence->name == substring->name
                && pSubsequence->start + pSubsequence->length + proximityToMerge >= substring->start) { //Merge
            if (pSubsequence->start + pSubsequence->length < substring->start + substring->length) {
                pSubsequence->length = substring->start + substring->length - pSubsequence->start;
            }
        } else {
            pSubsequence = substring_clone(substring);
            stList_append(mergedSubstrings, pSubsequence);
        }
    }
    return mergedSubstrings;
}
Esempio n. 3
0
void stCaf_addAdjacencies(Flower *flower) {
    //Build a list of caps.
    stList *list = stList_construct();
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        End_InstanceIterator *instanceIterator = end_getInstanceIterator(end);
        Cap *cap;
        while ((cap = end_getNext(instanceIterator)) != NULL) {
            if (!cap_getStrand(cap)) {
                cap = cap_getReverse(cap);
            }
            stList_append(list, cap);
        }
        end_destructInstanceIterator(instanceIterator);
    }
    flower_destructEndIterator(endIterator);
    assert(stList_length(list) % 2 == 0);
    //Sort the list of caps.
    stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP);
    //Now make the adjacencies.
    for (int64_t i = 1; i < stList_length(list); i += 2) {
        Cap *cap = stList_get(list, i - 1);
        Cap *cap2 = stList_get(list, i);
        cap_makeAdjacent(cap, cap2);
    }
    //Clean up.
    stList_destruct(list);
}
Esempio n. 4
0
void stTree_sortChildren(stTree *root, int cmpFn(stTree *a, stTree *b)) {
    sortChildrenCmpFn = cmpFn;
    stList_sort(root->nodes, sortChildrenListCmpFn);
    sortChildrenCmpFn = NULL;
    for (int i = 0; i < stTree_getChildNumber(root); i++) {
        stTree_sortChildren(stTree_getChild(root, i), cmpFn);
    }
}
static AdjacencySwitch *getBest2EdgeAdjacencySwitch(stList *components,
        stSortedSet *allAdjacencyEdges) {
    /*
     * Look for the two lowest value adjacency edges in all current edges that are in a separate component and returns them as an adjacency switch
     * with now new adjacency edges.
     */

    /*
     * Get lowest scoring edge for each component.
     */
    stList *lowestScoringEdgeFromEachComponent = stList_construct();
    for (int64_t i = 0; i < stList_length(components); i++) {
        stList_append(lowestScoringEdgeFromEachComponent,
                getLowestScoringEdge(stList_get(components, i)));
    }

    /*
     * Get two lowest scoring edges.
     */
    stList_sort(lowestScoringEdgeFromEachComponent,
            getBest2EdgeAdjacencySwitchP);
    stIntTuple *lowestScoreEdge1 = stList_get(
            lowestScoringEdgeFromEachComponent, 0);
    stIntTuple *lowestScoreEdge2 = stList_get(
            lowestScoringEdgeFromEachComponent, 1);
    assert(lowestScoreEdge1 != lowestScoreEdge2);

    stList_destruct(lowestScoringEdgeFromEachComponent); //Cleanup

    stIntTuple *newEdge1 = getWeightedEdgeFromSet(
            stIntTuple_get(lowestScoreEdge1, 0),
            stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges);
    stIntTuple *newEdge2 = getWeightedEdgeFromSet(
            stIntTuple_get(lowestScoreEdge1, 1),
            stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges);
    if (newEdge1 == NULL) {
        assert(newEdge2 == NULL);
        newEdge1 = getWeightedEdgeFromSet(
                stIntTuple_get(lowestScoreEdge1, 0),
                stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges);
        newEdge2 = getWeightedEdgeFromSet(
                stIntTuple_get(lowestScoreEdge1, 1),
                stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges);
    }
    assert(newEdge1 != NULL);
    assert(newEdge2 != NULL);

    return adjacencySwitch_construct(
            lowestScoreEdge1,
            lowestScoreEdge2,
            newEdge1,
            newEdge2,
            stIntTuple_get(lowestScoreEdge1, 2)
                    + stIntTuple_get(lowestScoreEdge2, 2));
}
Esempio n. 6
0
void test_stList_sort(CuTest *testCase) {
    setup();
    stList_sort(list, (int (*)(const void *, const void *))strcmp);
    CuAssertTrue(testCase, stList_length(list) == stringNumber);
    CuAssertStrEquals(testCase, "five", stList_get(list, 0));
    CuAssertStrEquals(testCase, "four", stList_get(list, 1));
    CuAssertStrEquals(testCase, "one", stList_get(list, 2));
    CuAssertStrEquals(testCase, "three", stList_get(list, 3));
    CuAssertStrEquals(testCase, "two", stList_get(list, 4));
    teardown();
}
Esempio n. 7
0
/* build a list of blocks, sorted by the root components */
static stList *buildRootSorted(struct malnSet *malnSet) {
    stList *sorted = stList_construct();
    struct malnBlkSetIterator *iter = malnBlkSet_getIterator(malnSet->blks);
    struct malnBlk *blk;
    while ((blk = malnBlkSetIterator_getNext(iter)) != NULL) {
        stList_append(sorted, blk);
    }
    malnBlkSetIterator_destruct(iter);
    stList_sort(sorted, blkCmpRootComp);
    return sorted;
}
Esempio n. 8
0
/* Get a list of components that overlap the specified guide range and are in
 * blocks not flagged as dying and matches treeLoc filters.  Return NULL if no
 * overlaps.  List is sorted by ascending width, which helps the merge
 * efficiency.  */
stList *malnSet_getOverlappingComps(struct malnSet *malnSet, struct Seq *seq, int chromStart, int chromEnd, unsigned treeLocFilter) {
    if (malnSet->compRangeMap == NULL) {
        buildRangeTree(malnSet);
    }
    stList *overComps = NULL;
    for (struct range *rng = genomeRangeTreeAllOverlapping(malnSet->compRangeMap, seq->orgSeqName, chromStart, chromEnd); rng != NULL; rng = rng->next) {
        for (struct slRef *compRef = rng->val; compRef != NULL; compRef = compRef->next) {
            struct malnComp *comp = compRef->val;
            if (keepOverlap(comp, seq, chromStart, chromEnd, treeLocFilter)) {
                if (overComps == NULL) { 
                    overComps = stList_construct();
                }
                stList_append(overComps, comp);
            }
        }
    }

    // sort so tests are reproducible
    if (overComps != NULL) {
        stList_sort(overComps, sortCompListCmpFn);
    }
    return overComps;
}
Esempio n. 9
0
int main(int argc, char *argv[]) {
    // Parse arguments
    if (argc != 3) {
        usage(argv);
        return 1;
    }

    // You would load a custom HMM here if you wanted using
    // hmm_getStateMachine (see the realign code)
    StateMachine *stateMachine  = stateMachine5_construct(fiveState);

    PairwiseAlignmentParameters *parameters = pairwiseAlignmentBandingParameters_construct();

    stHash *targetSequences = readFastaFile(argv[1]);
    stHash *querySequences = readFastaFile(argv[2]);

    // For each query sequence, align it against all target sequences.
    stHashIterator *queryIt = stHash_getIterator(querySequences);
    char *queryHeader;
    while ((queryHeader = stHash_getNext(queryIt)) != NULL) {
        char *querySeq = stHash_search(querySequences, queryHeader);
        stHashIterator *targetIt = stHash_getIterator(targetSequences);
        char *targetHeader;
        while ((targetHeader = stHash_getNext(targetIt)) != NULL) {
            char *targetSeq = stHash_search(targetSequences, targetHeader);
            // Here we should try both the target sequence and its
            // reverse-complemented version


            // Aligns the sequences.
            // If you have alignment constraints (anchors) you should
            // replace this with getAlignedPairsUsingAnchors.
            stList *alignedPairs = getAlignedPairs(stateMachine, targetSeq,
                                                   querySeq, parameters,
                                                   true, true);
            // Takes into account the probability of aligning to a
            // gap, by transforming the posterior probability into the
            // AMAP objective function (see Schwartz & Pachter, 2007).
            alignedPairs = reweightAlignedPairs2(alignedPairs, strlen(targetSeq),
                                                 strlen(querySeq),
                                                 parameters->gapGamma);
            // I think this calculates the optimal ordered set of
            // alignments from the unordered set of aligned pairs, not
            // completely sure.
            alignedPairs = filterPairwiseAlignmentToMakePairsOrdered(alignedPairs,
                                                                     targetSeq,
                                                                     querySeq,
                                                                     // This parameter says that the minimum posterior probability we will accept has to be at least 0.9.
                                                                     0.9);

            // After this the "aligned pairs" data structure changes,
            // which is a little sketchy. It's just so that the
            // alignment can be printed properly.
            stList_mapReplace(alignedPairs, convertToAnchorPair, NULL);
            stList_sort(alignedPairs, (int (*)(const void *, const void *)) stIntTuple_cmpFn);
            struct PairwiseAlignment *alignment = convertAlignedPairsToPairwiseAlignment(targetHeader, queryHeader,
                                                                                  0, strlen(targetSeq), strlen(querySeq), alignedPairs);
            // Output the cigar string
            cigarWrite(stdout, alignment, 0);

            stList_destruct(alignedPairs);
            destructPairwiseAlignment(alignment);
        }
        stHash_destructIterator(targetIt);
    }
    stHash_destructIterator(queryIt);

    // Clean up
    stHash_destruct(targetSequences);
    stHash_destruct(querySequences);

    pairwiseAlignmentBandingParameters_destruct(parameters);
    stateMachine_destruct(stateMachine);
}