Esempio n. 1
0
static void checkComponents(CuTest *testCase, stList *filteredEdges) {
    stHash *nodesToComponents = getComponents(filteredEdges);
    //Check all components are smaller than threshold
    stList *components = stHash_getValues(nodesToComponents);
    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *component = stList_get(components, i);
        CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize);
        CuAssertTrue(testCase, stSortedSet_size(component) >= 1);
    }
    //Check no edges can be added from those filtered.
    stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn);
    for (int64_t i = 0; i < stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        if (stSortedSet_search(filteredEdgesSet, edge) == NULL) {
            stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
            stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
            stSortedSet *component1 = stHash_search(nodesToComponents, node1);
            stSortedSet *component2 = stHash_search(nodesToComponents, node2);
            CuAssertTrue(testCase, component1 != NULL && component2 != NULL);
            CuAssertTrue(testCase, component1 != component2);
            CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize);
            stIntTuple_destruct(node1);
            stIntTuple_destruct(node2);
        }
    }
    stSortedSet_destruct(filteredEdgesSet);
    //Cleanup the components
    stSortedSet *componentsSet = stList_getSortedSet(components, NULL);
    stList_destruct(components);
    stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct);
    stSortedSet_destruct(componentsSet);
    stHash_destruct(nodesToComponents);
}
static void splitIntoAdjacenciesStubsAndChains(stList *subCycle,
        stList *adjacencyEdges, stList *stubEdges, stList *chainEdges,
        stList **subAdjacencyEdges, stList **subStubEdges,
        stList **subChainEdges) {
    /*
     * Splits run into cycles and chains..
     */
    *subStubEdges = stList_construct();
    *subChainEdges = stList_construct();
    for (int64_t j = 0; j < stList_length(subCycle); j++) {
        stIntTuple *edge = stList_get(subCycle, j);
        if (stList_contains(stubEdges, edge)) {
            stList_append(*subStubEdges, edge);
        } else if (stList_contains(chainEdges, edge)) {
            stList_append(*subChainEdges, edge);
        }
    }
    *subAdjacencyEdges = stList_construct();
    stSortedSet *nodes = getNodeSetOfEdges(subCycle);
    for (int64_t j = 0; j < stList_length(adjacencyEdges); j++) {
        stIntTuple *edge = stList_get(adjacencyEdges, j);
        if (nodeInSet(nodes, stIntTuple_get(edge, 0)) && nodeInSet(
                nodes, stIntTuple_get(edge, 1))) {
            stList_append(*subAdjacencyEdges, edge);
        }
    }
    stSortedSet_destruct(nodes);
}
Esempio n. 3
0
static stHash *getComponents(stList *filteredEdges) {
    /*
     * A kind of stupid reimplementation of the greedy function, done just to trap typos.
     */
    stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey,
            (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL);
    for (int64_t i = 0; i < stList_length(nodes); i++) {
        stIntTuple *node = stList_get(nodes, i);
        stSortedSet *component = stSortedSet_construct();
        stSortedSet_insert(component, node);
        stHash_insert(nodesToComponents, node, component);
    }
    for (int64_t i = 0; i < stList_length(filteredEdges); i++) {
        stIntTuple *edge = stList_get(filteredEdges, i);
        stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
        stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
        stSortedSet *component1 = stHash_search(nodesToComponents, node1);
        stSortedSet *component2 = stHash_search(nodesToComponents, node2);
        assert(component1 != NULL && component2 != NULL);
        if (component1 != component2) {
            stSortedSet *component3 = stSortedSet_getUnion(component1, component2);
            stSortedSetIterator *setIt = stSortedSet_getIterator(component3);
            stIntTuple *node3;
            while ((node3 = stSortedSet_getNext(setIt)) != NULL) {
                stHash_insert(nodesToComponents, node3, component3);
            }
            stSortedSet_destructIterator(setIt);
            stSortedSet_destruct(component1);
            stSortedSet_destruct(component2);
        }
        stIntTuple_destruct(node1);
        stIntTuple_destruct(node2);
    }
    return nodesToComponents;
}
static void makeMatchingPerfect(stList *chosenEdges, stList *adjacencyEdges,
        stSortedSet *nodes) {
    /*
     * While the the number of edges is less than a perfect matching add random edges.
     */
    stSortedSet *attachedNodes = getNodeSetOfEdges(chosenEdges);
    stHash *nodesToAdjacencyEdges = getNodesToEdgesHash(adjacencyEdges);
    stIntTuple *pNode = NULL;
    stSortedSetIterator *it = stSortedSet_getIterator(nodes);
    stIntTuple *node;
    while((node = stSortedSet_getNext(it)) != NULL) {
        if (stSortedSet_search(attachedNodes, node) == NULL) {
            if (pNode == NULL) {
                pNode = node;
            } else {
                stList_append(chosenEdges,
                        getEdgeForNodes(stIntTuple_get(pNode, 0), stIntTuple_get(node, 0), nodesToAdjacencyEdges));
                pNode = NULL;
            }
        }
    }
    stSortedSet_destructIterator(it);
    assert(pNode == NULL);
    stSortedSet_destruct(attachedNodes);
    assert(stList_length(chosenEdges) * 2 == stSortedSet_size(nodes));
    stHash_destruct(nodesToAdjacencyEdges);
}
static AdjacencySwitch *getBest4EdgeAdjacencySwitchP(stIntTuple *oldEdge1,
        int64_t node1, stSortedSet *allAdjacencyEdges,
        stHash *nodesToAllCurrentEdges, stHash *nodesToBridgingAdjacencyEdges) {
    /*
     * Returns the best adjacency switch for the given node and edge that
     * contains 4 existing edges.
     */
    int64_t node4 = getOtherPosition(oldEdge1, node1);
    AdjacencySwitch *minimumCostAdjacencySwitch = NULL;
    stList *validEdges = getItemForNode(node1, nodesToBridgingAdjacencyEdges);
    if (validEdges != NULL) {
        for (int64_t i = 0; i < stList_length(validEdges); i++) {
            stIntTuple *newEdge1 = stList_get(validEdges, i);
            int64_t node2 = getOtherPosition(newEdge1, node1);
            stList *validEdges2 =
                    getItemForNode(node2, nodesToAllCurrentEdges);
            assert(validEdges2 != NULL);
            assert(stList_length(validEdges2) == 1);
            stIntTuple *oldEdge2 = stList_peek(validEdges2);
            int64_t node3 = getOtherPosition(oldEdge2, node2);
            stIntTuple *newEdge2 = getWeightedEdgeFromSet(node3, node4,
                    allAdjacencyEdges);
            assert(newEdge2 != NULL);
            int64_t cost = stIntTuple_get(oldEdge1, 2)
                    + stIntTuple_get(oldEdge2, 2)
                    - stIntTuple_get(newEdge1, 2)
                    - stIntTuple_get(newEdge2, 2);
            minimumCostAdjacencySwitch = adjacencySwitch_update(
                    minimumCostAdjacencySwitch, oldEdge1, oldEdge2, newEdge1,
                    newEdge2, cost);
        }
    }
    return minimumCostAdjacencySwitch;
}
stList *chooseMatching_greedy(stList *edges, int64_t nodeNumber) {
    /*
     * Greedily picks the edge from the list such that each node has at most one edge.
     */
    //First clone the list..
    edges = stList_copy(edges, NULL);

    stSortedSet *seen = getEmptyNodeOrEdgeSetWithCleanup();
    stList *matching = stList_construct();

    //Sort the adjacency pairs..
    stList_sort(edges, chooseMatching_greedyP);

    double strength = INT64_MAX;
    while (stList_length(edges) > 0) {
        stIntTuple *edge = stList_pop(edges);
        double d = stIntTuple_get(edge, 2);
        assert(d <= strength);
        strength = d;
        if(!nodeInSet(seen, stIntTuple_get(edge, 0)) && !nodeInSet(seen, stIntTuple_get(edge, 1))) {
            addNodeToSet(seen, stIntTuple_get(edge, 0));
            addNodeToSet(seen, stIntTuple_get(edge, 1));
            stList_append(matching,edge);
        }
    }
    assert(stList_length(edges) == 0);
    stList_destruct(edges);
    stSortedSet_destruct(seen);

    return matching;
}
Esempio n. 7
0
/*
 * Function does the actual depth first search to detect if the thing has an acyclic ordering.
 */
static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos,
                   stSortedSet *started, stSortedSet *done) {
    if(stSortedSet_search(started, seqPos) != NULL) {
        if(stSortedSet_search(done, seqPos) == NULL) {
            //We have detected a cycle
            //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1));
            return 1;
        }
        //We have already explored this area, but no cycle.
        return 0;
    }
    stSortedSet_insert(started, seqPos);

    int64_t cycle =0;

    stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1);
    stSortedSet *column = stHash_search(adjacencyList, nextSeqPos);
    if(column != NULL) { //It is in the adjacency list, so we can do the recursion
        assert(stSortedSet_search(column, nextSeqPos) != NULL);
        stSortedSetIterator *it = stSortedSet_getIterator(column);
        stIntTuple *seqPos2;
        while((seqPos2 = stSortedSet_getNext(it)) != NULL) {
            cycle = cycle || dfs(adjacencyList, seqPos2, started, done);
        }
        stSortedSet_destructIterator(it);
    }
    stIntTuple_destruct(nextSeqPos);
    stSortedSet_insert(done, seqPos);
    return cycle;
}
Esempio n. 8
0
static void debugScaffoldPathsP(Cap *cap, stList *haplotypePath,
        stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash,
        stHash *segmentToMaximalHaplotypePathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters, bool capDir) {
    int64_t insertLength;
    int64_t deleteLength;
    Cap *otherCap;
    enum CapCode capCode = getCapCode(cap, &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
    if (capCode == SCAFFOLD_GAP || capCode == AMBIGUITY_GAP) {
        Segment *adjacentSegment = getAdjacentCapsSegment(cap);
        assert(adjacentSegment != NULL);
        while (!hasCapInEvents(cap_getEnd(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)), haplotypeEventStrings)) {
            adjacentSegment = getAdjacentCapsSegment(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment));
            assert(adjacentSegment != NULL);
        }
        assert(adjacentSegment != NULL);
        assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(adjacentSegment))));
        stIntTuple *j = stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath);
        (void)j;
        assert(j != NULL);
        stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
        if (adjacentHaplotypePath == NULL) {
            adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash,
                    segment_getReverse(adjacentSegment));
        }
        assert(adjacentHaplotypePath != NULL);
        assert(adjacentHaplotypePath != haplotypePath);
        stIntTuple *k = stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath);
        (void)k;
        assert(k != NULL);
        assert(stIntTuple_get(j, 0) == stIntTuple_get(k, 0));
        assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) ==
                stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath));
    }
}
static stHash *putEdgesInHash(stList *edges) {
    stHash *intsToEdgesHash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t i=0; i<stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        stHash_insert(intsToEdgesHash, constructEdge(stIntTuple_get(edge, 0), stIntTuple_get(edge, 1)), edge);
    }
    return intsToEdgesHash;
}
static AdjacencySwitch *getBest2EdgeAdjacencySwitch(stList *components,
        stSortedSet *allAdjacencyEdges) {
    /*
     * Look for the two lowest value adjacency edges in all current edges that are in a separate component and returns them as an adjacency switch
     * with now new adjacency edges.
     */

    /*
     * Get lowest scoring edge for each component.
     */
    stList *lowestScoringEdgeFromEachComponent = stList_construct();
    for (int64_t i = 0; i < stList_length(components); i++) {
        stList_append(lowestScoringEdgeFromEachComponent,
                getLowestScoringEdge(stList_get(components, i)));
    }

    /*
     * Get two lowest scoring edges.
     */
    stList_sort(lowestScoringEdgeFromEachComponent,
            getBest2EdgeAdjacencySwitchP);
    stIntTuple *lowestScoreEdge1 = stList_get(
            lowestScoringEdgeFromEachComponent, 0);
    stIntTuple *lowestScoreEdge2 = stList_get(
            lowestScoringEdgeFromEachComponent, 1);
    assert(lowestScoreEdge1 != lowestScoreEdge2);

    stList_destruct(lowestScoringEdgeFromEachComponent); //Cleanup

    stIntTuple *newEdge1 = getWeightedEdgeFromSet(
            stIntTuple_get(lowestScoreEdge1, 0),
            stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges);
    stIntTuple *newEdge2 = getWeightedEdgeFromSet(
            stIntTuple_get(lowestScoreEdge1, 1),
            stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges);
    if (newEdge1 == NULL) {
        assert(newEdge2 == NULL);
        newEdge1 = getWeightedEdgeFromSet(
                stIntTuple_get(lowestScoreEdge1, 0),
                stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges);
        newEdge2 = getWeightedEdgeFromSet(
                stIntTuple_get(lowestScoreEdge1, 1),
                stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges);
    }
    assert(newEdge1 != NULL);
    assert(newEdge2 != NULL);

    return adjacencySwitch_construct(
            lowestScoreEdge1,
            lowestScoreEdge2,
            newEdge1,
            newEdge2,
            stIntTuple_get(lowestScoreEdge1, 2)
                    + stIntTuple_get(lowestScoreEdge2, 2));
}
Esempio n. 11
0
static void test_stPosetAlignment_addAndIsPossible(CuTest *testCase) {
    for(int64_t trial=0; trial<100; trial++) {
        setup();

        //Make random number of sequences.
        stList *sequenceLengths = stList_construct3(0, (void (*)(void *))stIntTuple_destruct);
        for(int64_t i=0; i<sequenceNumber; i++) {
            stList_append(sequenceLengths, stIntTuple_construct1( st_randomInt(0, MAX_SEQUENCE_SIZE)));
        }

        //Propose random alignment pairs...
        stList *pairs = stList_construct3(0, (void(*)(void *))stIntTuple_destruct);
        int64_t maxAlignedPairs = st_randomInt(0, MAX_ALIGNMENTS);
        if(sequenceNumber > 0) {
            for(int64_t i=0; i<maxAlignedPairs; i++) {
                int64_t seq1 = st_randomInt(0, sequenceNumber);
                int64_t seqLength1 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0);
                if(seqLength1 == 0) {
                    continue;
                }
                int64_t position1 = st_randomInt(0, seqLength1);
                int64_t seq2 = st_randomInt(0, sequenceNumber);
                int64_t seqLength2 = stIntTuple_get(stList_get(sequenceLengths, seq1), 0);
                if(seqLength2 == 0) {
                    continue;
                }
                int64_t position2 = st_randomInt(0, seqLength2);
                if(seq1 != seq2) {
                    stList_append(pairs, stIntTuple_construct4( seq1, position1, seq2, position2));
                    if(stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2)) {
                        st_logInfo("In %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2);
                        //For each accepted pair check it doesn't create a cycle.
                        CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber));
                        CuAssertTrue(testCase, stPosetAlignment_add(posetAlignment, seq1, position1, seq2, position2));
                    }
                    else {
                        st_logInfo("Out %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 " \n", seq1, position1, seq2, position2);
                        //For each rejected pair check it creates a cycle..
                        CuAssertTrue(testCase, containsACycle(pairs, sequenceNumber));
                        CuAssertTrue(testCase, !stPosetAlignment_isPossible(posetAlignment, seq1, position1, seq2, position2));
                        stIntTuple_destruct(stList_pop(pairs)); //remove the pair which created the cycle.
                        CuAssertTrue(testCase, !containsACycle(pairs, sequenceNumber)); //Check we're back to being okay..
                    }
                }
            }
        }

        //Cleanup
        stList_destruct(sequenceLengths);
        stList_destruct(pairs);
        teardown();
        st_logInfo("Passed a random ordering test with %" PRIi64 " sequences and %" PRIi64 " aligned pairs\n", sequenceNumber, maxAlignedPairs);
    }
}
Esempio n. 12
0
bool stPosetAlignment_isPossibleP(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2, int64_t position2) {
    stIntTuple *constraint = getConstraint_lessThan(posetAlignment, sequence1, position1, sequence2);
    if(constraint == NULL) {
        return 1;
    }
    if(stIntTuple_get(constraint, 2) && stIntTuple_get(constraint, 0) == position1) { //less than or equals
        return position2 <= stIntTuple_get(constraint, 1);
    }
    else {
        return position2 < stIntTuple_get(constraint, 1);
    }
}
static AdjacencySwitch *getBest4EdgeAdjacencySwitch2(stIntTuple *oldEdge1,
        stSortedSet *allAdjacencyEdges, stHash *nodesToAllCurrentEdgesSet,
        stHash *nodesToBridgingAdjacencyEdges) {
    /*
     * Returns the best 3 or 4 edge switch (one including 3 or 4 edges) for the given existing edge, if they exist, or else NULL.
     */
    return getMinimumCostAdjacencySwitch(
            getBest4EdgeAdjacencySwitchP(oldEdge1,
                    stIntTuple_get(oldEdge1, 0), allAdjacencyEdges,
                    nodesToAllCurrentEdgesSet, nodesToBridgingAdjacencyEdges),
            getBest4EdgeAdjacencySwitchP(oldEdge1,
                    stIntTuple_get(oldEdge1, 1), allAdjacencyEdges,
                    nodesToAllCurrentEdgesSet, nodesToBridgingAdjacencyEdges));
}
Esempio n. 14
0
static void stPosetAlignment_addP2(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t sequence3, int64_t position3, int64_t sequence2, int64_t position2, int64_t lessThanOrEqual) {
    for(int64_t sequence4=0; sequence4<posetAlignment->sequenceNumber; sequence4++) {
        if(sequence4 != sequence1 && sequence4 != sequence2 && sequence4 != sequence3) {
            stIntTuple *constraint = getConstraint_lessThan(posetAlignment, sequence2, position2, sequence4);
            if(constraint != NULL) {
                int64_t position4 = stIntTuple_get(constraint, 1);
                int64_t transLessThanOrEqual = lessThanOrEqual && stIntTuple_get(constraint, 2) && stIntTuple_get(constraint, 0) == position2; //stuff which maintains the less than or equals
                if(lessThanConstraintIsPrime(posetAlignment, sequence3, position3, sequence4, position4, transLessThanOrEqual)) {//We have a new transitive constraint..
                    addConstraint_lessThan(posetAlignment, sequence3, position3, sequence4, position4, transLessThanOrEqual);
                }
            }
        }
    }
}
static stList *getOddToEvenAdjacencyEdges(stSortedSet *oddNodes,
        stList *adjacencyEdges) {
    /*
     * Gets edges that include one node in the set of oddNodes, but not both.
     */
    stList *oddToEvenAdjacencyEdges = stList_construct();
    for (int64_t i = 0; i < stList_length(adjacencyEdges); i++) {
        stIntTuple *edge = stList_get(adjacencyEdges, i);
        if (nodeInSet(oddNodes, stIntTuple_get(edge, 0)) ^ nodeInSet(
                oddNodes, stIntTuple_get(edge, 1))) {
            stList_append(oddToEvenAdjacencyEdges, edge);
        }
    }
    return oddToEvenAdjacencyEdges;
}
Esempio n. 16
0
static int comparePositions(stIntTuple *position1, stIntTuple *position2) {
    if(stIntTuple_get(position1, 0) == INT64_MAX || stIntTuple_get(position2, 0) == INT64_MAX) { //Indicates we should ignore the first position and compare the second.
        assert(stIntTuple_get(position1, 1) != INT64_MAX);
        assert(stIntTuple_get(position2, 1) != INT64_MAX);
        return cmpFn(stIntTuple_get(position1, 1), stIntTuple_get(position2, 1));
    }
    return cmpFn(stIntTuple_get(position1, 0), stIntTuple_get(position2, 0));
}
static void writeGraph(FILE *fileHandle, stList *edges, int64_t nodeNumber) {
    /*
     * Writes out just the adjacencies in the blossom format.
     */
    int64_t edgeNumber = stList_length(edges);
    fprintf(fileHandle, "%" PRIi64 " %" PRIi64 "\n", nodeNumber, edgeNumber);
    for(int64_t i=0; i<stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        int64_t from =  stIntTuple_get(edge, 0);
        int64_t to = stIntTuple_get(edge, 1);
        int64_t weight = stIntTuple_get(edge, 2);
        //All the algorithms are minimisation algorithms, so we invert the sign.
        fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %" PRIi64 "\n", from, to, weight);
    }
}
Esempio n. 18
0
/*
 * Returns non-zero iff the constraint is prime. The lessThanOrEquals argument, if non-zero specifies the constraint is less than equals.
 */
static bool lessThanConstraintIsPrime(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2, int64_t position2, int64_t lessThanOrEquals) {
    stIntTuple *constraint = getConstraint_lessThan(posetAlignment, sequence1, position1, sequence2);
    if(constraint == NULL) {
        return 1;
    }
    if(position2 < stIntTuple_get(constraint, 1)) { //new constraint is tighter
        return 1;
    }
    if(position2 > stIntTuple_get(constraint, 1)) { //new constraint is looser
        return 0;
    }
    if(position1 == stIntTuple_get(constraint, 0) && stIntTuple_get(constraint, 2) && !lessThanOrEquals) { //converts a less than or equals constraint to a less than constraint
        return 1;
    }
    return 0;
}
stIntTuple *getLowestScoringEdge(stList *edges) {
    /*
     * Returns edge with lowest weight.
     */
    assert(stList_length(edges) > 0);
    stIntTuple *lowestScoringEdge = stList_get(edges, 0);
    int64_t lowestScore = stIntTuple_get(lowestScoringEdge, 2);
    for (int64_t j = 1; j < stList_length(edges); j++) {
        stIntTuple *edge = stList_get(edges, j);
        int64_t k = stIntTuple_get(edge, 2);
        if (k < lowestScore) {
            lowestScore = k;
            lowestScoringEdge = edge;
        }
    }
    return lowestScoringEdge;
}
Esempio n. 20
0
/*
 * Gets the position in sequence2 that the position in sequence1 must be greater than or equal to in the alignment
 */
static stIntTuple *getConstraint_greaterThan(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2) {
    stIntTuple *pos = stIntTuple_construct2(INT64_MAX, position1);
    //Get less than or equal
    stIntTuple *constraint = stSortedSet_searchLessThanOrEqual(getConstraintList(posetAlignment, sequence2, sequence1), pos);
    stIntTuple_destruct(pos);
    assert(constraint == NULL || position1 >= stIntTuple_get(constraint, 1));
    return constraint;
}
static stList *getEdgesThatBridgeComponents(stList *components,
        stHash *nodesToNonZeroWeightedAdjacencyEdges) {
    /*
     * Get set of adjacency edges that bridge between (have a node in two) components.
     */

    stList *bridgingAdjacencyEdges = stList_construct();

    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *componentNodes = getNodeSetOfEdges(
                stList_get(components, i));
        stSortedSetIterator *it = stSortedSet_getIterator(componentNodes);
        stIntTuple *node;
        while ((node = stSortedSet_getNext(it)) != NULL) {
            stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges,
                    node);
            if (edges != NULL) {
                for (int64_t j = 0; j < stList_length(edges); j++) {
                    stIntTuple *edge = stList_get(edges, j);
                    stIntTuple *node1 = stIntTuple_construct1(
                            stIntTuple_get(edge, 0));
                    stIntTuple *node2 = stIntTuple_construct1(
                            stIntTuple_get(edge, 1));
                    assert(
                            stSortedSet_search(componentNodes, node1) != NULL
                                    || stSortedSet_search(componentNodes, node2)
                                            != NULL);
                    if (stSortedSet_search(componentNodes, node1) == NULL
                            || stSortedSet_search(componentNodes, node2)
                                    == NULL) {
                        stList_append(bridgingAdjacencyEdges, edge);
                    }
                    stIntTuple_destruct(node1);
                    stIntTuple_destruct(node2);
                }
            }
        }
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(componentNodes);
    }

    return bridgingAdjacencyEdges;
}
Esempio n. 22
0
// copied from cPecanRealign
struct PairwiseAlignment *convertAlignedPairsToPairwiseAlignment(char *seqName1, char *seqName2, double score,
        int64_t length1, int64_t length2, stList *alignedPairs) {
    //Make pairwise alignment
    int64_t pX = -1, pY = -1, mL = 0;
    //Create an end matched pair, which is used to ensure the alignment has the correct end indels.
    struct List *opList = constructEmptyList(0, (void (*)(void *)) destructAlignmentOperation);
    stList_append(alignedPairs, stIntTuple_construct2(length1, length2));
    for (int64_t i = 0; i < stList_length(alignedPairs); i++) {
        stIntTuple *alignedPair = stList_get(alignedPairs, i);
        int64_t x = stIntTuple_get(alignedPair, 0);
        int64_t y = stIntTuple_get(alignedPair, 1);
        assert(x - pX > 0);
        assert(y - pY > 0);
        if (x - pX > 0 && y - pY > 0) { //This is a hack for filtering
            if (x - pX > 1) { //There is an indel.
                if (mL > 0) {
                    listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0));
                    mL = 0;
                }
                listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_X, x - pX - 1, 0));
            }
            if (y - pY > 1) {
                if (mL > 0) {
                    listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0));
                    mL = 0;
                }
                listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_Y, y - pY - 1, 0));
            }
            mL++;
            pX = x;
            pY = y;
        }
    }
    //Deal with a trailing match, but exclude the final match
    if (mL > 1) {
        listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL - 1, 0));
    }
    stIntTuple_destruct(stList_pop(alignedPairs));
    //Construct the alignment
    struct PairwiseAlignment *pA = constructPairwiseAlignment(seqName1, 0, length1, 1, seqName2, 0, length2, 1, score,
            opList);
    return pA;
}
int64_t matchingCardinality(stList *matching) {
    /*
     * Returns number of edges with weight > 0.
     */
    int64_t totalCardinality = 0;
    for (int64_t i = 0; i < stList_length(matching); i++) {
        stIntTuple *edge = stList_get(matching, i);
        totalCardinality += stIntTuple_get(edge, 2) > 0 ? 1 : 0;
    }
    return totalCardinality;
}
int64_t matchingWeight(stList *matching) {
    /*
     * Returns sum of weights.
     */
    int64_t totalWeight = 0;
    for(int64_t i=0; i<stList_length(matching); i++) {
        stIntTuple *edge = stList_get(matching, i);
        totalWeight += stIntTuple_get(edge, 2);
    }
    return totalWeight;
}
stList *getComponents(stList *edges) {
    /*
     * Gets a list of connected components, each connected component
     * being represented as a list of the edges, such that each edge is in exactly one
     * connected component. Allows for multi-graphs (multiple edges connecting two nodes).
     */

    stHash *nodesToEdges = getNodesToEdgesHash(edges);

    /*
     * Traverse the edges greedily
     */
    stList *components =
            stList_construct3(0, (void(*)(void *)) stList_destruct);
    stList *nodes = stHash_getKeys(nodesToEdges);
    while (stList_length(nodes) > 0) {
        stIntTuple *node = stList_pop(nodes);
        stList *edges = stHash_search(nodesToEdges, node);
        if (edges != NULL) { //We have a component to build
            stSortedSet *component = stSortedSet_construct();
            stHash_remove(nodesToEdges, node);
            for (int64_t i = 0; i < stList_length(edges); i++) {
                stIntTuple *edge = stList_get(edges, i);
                getComponentsP(nodesToEdges, stIntTuple_get(edge, 0),
                        component);
                getComponentsP(nodesToEdges, stIntTuple_get(edge, 1),
                        component);
            }
            stList_append(components, stSortedSet_getList(component));
            //Cleanup
            stSortedSet_destruct(component);
            stList_destruct(edges);
        }
        stIntTuple_destruct(node);
    }
    assert(stHash_size(nodesToEdges) == 0);
    stHash_destruct(nodesToEdges);
    stList_destruct(nodes);

    return components;
}
Esempio n. 26
0
/*
 * This builds an adjacency list structure for the the sequences. Every sequence-position
 * has a column in the hash with which it can be aligned with.
 */
static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) {
    stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey,
                                     (int (*)(const void *, const void *))stIntTuple_equalsFn,
                                     (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) {
            stIntTuple *seqPos = stIntTuple_construct2( seq, position);
            stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
            stSortedSet_insert(column, seqPos);
            stHash_insert(hash, seqPos, column);
        }
    }
    stListIterator *it = stList_getIterator(pairs);
    stIntTuple *pair;
    while((pair = stList_getNext(it)) != NULL) {
        stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1));
        stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3));
        stSortedSet *column1 = stHash_search(hash, seqPos1);
        assert(column1 != NULL);
        stSortedSet *column2 = stHash_search(hash, seqPos2);
        assert(column2 != NULL);
        if(column1 != column2) { //Merge the columns
            stSortedSetIterator *it2 = stSortedSet_getIterator(column2);
            stIntTuple *seqPos3;
            while((seqPos3 = stSortedSet_getNext(it2)) != NULL) {
                assert(stSortedSet_search(column1, seqPos3) == NULL);
                stSortedSet_insert(column1, seqPos3);
                assert(stHash_search(hash, seqPos3) == column2);
                stHash_insert(hash, seqPos3, column1);
                assert(stHash_search(hash, seqPos3) == column1);
            }
            stSortedSet_destructIterator(it2);
            stSortedSet_destruct(column2);
        }
        //Cleanup loop.
        stIntTuple_destruct(seqPos1);
        stIntTuple_destruct(seqPos2);
    }
    stList_destructIterator(it);
    return hash;
}
static void getComponentsP(stHash *nodesToEdges, int64_t node,
        stSortedSet *component) {
    stIntTuple *key = stIntTuple_construct1( node);
    stList *edges = stHash_search(nodesToEdges, key);
    if (edges != NULL) {
        stHash_remove(nodesToEdges, key);
        for (int64_t i = 0; i < stList_length(edges); i++) {
            stIntTuple *edge = stList_get(edges, i);
            if (stSortedSet_search(component, edge) == NULL) {
                stSortedSet_insert(component, edge);
            }
            /*
             * Recursion on stack could equal the total number of nodes.
             */
            getComponentsP(nodesToEdges, stIntTuple_get(edge, 0),
                    component);
            getComponentsP(nodesToEdges, stIntTuple_get(edge, 1),
                    component);
        }
        stList_destruct(edges);
    }
    stIntTuple_destruct(key);
}
Esempio n. 28
0
static void stPosetAlignment_addP(stPosetAlignment *posetAlignment, int64_t sequence1, int64_t position1, int64_t sequence2, int64_t position2) {
    //for all pairs do check..
    if(lessThanConstraintIsPrime(posetAlignment, sequence1, position1, sequence2, position2, 1)) {
        addConstraint_lessThan(posetAlignment, sequence1, position1, sequence2, position2, 1);
        for(int64_t sequence3=0; sequence3<posetAlignment->sequenceNumber; sequence3++) {
            if(sequence3 != sequence2) {
                if(sequence3 != sequence1) {
                    stIntTuple *constraint = getConstraint_greaterThan(posetAlignment, sequence1, position1, sequence3);
                    if(constraint != NULL) {
                        int64_t position3 = stIntTuple_get(constraint, 0); //its reversed
                        int64_t lessThanOrEqual = stIntTuple_get(constraint, 2) && stIntTuple_get(constraint, 1) == position1;
                        if(lessThanConstraintIsPrime(posetAlignment, sequence3, position3, sequence2, position2, lessThanOrEqual)) { //new constraint found, so add it to the set..
                            addConstraint_lessThan(posetAlignment, sequence3, position3, sequence2, position2, lessThanOrEqual);
                            stPosetAlignment_addP2(posetAlignment, sequence1, sequence3, position3, sequence2, position2, lessThanOrEqual);
                        }
                    }
                }
                else {
                    stPosetAlignment_addP2(posetAlignment, INT64_MAX, sequence1, position1, sequence2, position2, 1);
                }
            }
        }
    }
}
static void writeCliqueGraph(FILE *fileHandle, stList *edges, int64_t nodeNumber, bool negativeWeights) {
    /*
     * Writes out a representation of the adjacencies and ends as a graph readable by blossom.
     * Writes out additional edges so that every pair of nodes is connected.
     */
    int64_t edgeNumber = ((nodeNumber * nodeNumber)  - nodeNumber) / 2;
    fprintf(fileHandle, "%" PRIi64 " %" PRIi64 "\n", nodeNumber, edgeNumber);
    stSortedSet *seen = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, (void (*)(void *))stIntTuple_destruct);
    int64_t edgesWritten = 0;
    for(int64_t i=0; i<stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        int64_t from =  stIntTuple_get(edge, 0);
        int64_t to = stIntTuple_get(edge, 1);
        assert(from < nodeNumber);
        assert(to < nodeNumber);
        assert(from >= 0);
        assert(to >= 0);
        assert(from != to);
        int64_t weight = stIntTuple_get(edge, 2);
        //If is a minimisation algorithms we invert the sign..
        fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %" PRIi64 "\n", from, to, negativeWeights ? -weight : weight);
        edgesWritten++;
        addEdgeToSet(seen, from, to);
    }
    for(int64_t i=0; i<nodeNumber; i++) {
        for(int64_t j=i+1; j<nodeNumber; j++) {
            if(!edgeInSet(seen, i, j)) {
                fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " 0\n", i, j);
                edgesWritten++;
            }
        }
    }
    //Cleanup
    stSortedSet_destruct(seen);
    assert(edgeNumber == edgesWritten);
}
static stSortedSet *getOddNodes(stList *cycle) {
    /*
     * Returns alternating nodes in a simple cycle.
     */

    //Set to return
    stSortedSet *nodes = stSortedSet_construct3(
            (int(*)(const void *, const void *)) stIntTuple_cmpFn,
            (void(*)(void *)) stIntTuple_destruct);

    stHash *nodesToEdges = getNodesToEdgesHash(cycle);
    int64_t node = stIntTuple_get(stList_get(cycle, 0), 0);
    int64_t pNode = -1;
    int64_t counter = 0;
    bool b = 1;
    assert(stList_length(cycle) % 2 == 0);
    while (counter++ < stList_length(cycle)) {
        if (b) { //Make alternating
            addNodeToSet(nodes, node);
            b = 0;
        } else {
            b = 1;
        }
        stList *edges = getItemForNode(node, nodesToEdges);
        assert(stList_length(edges) == 2);
        stIntTuple *edge = stList_get(edges, 0);
        int64_t node2 = getOtherPosition(edge, node);
        if (node2 != pNode) {
            pNode = node;
            node = node2;
            continue;
        }
        edge = stList_get(edges, 1);
        node2 = getOtherPosition(edge, node);
        assert(node2 != pNode);
        pNode = node;
        node = node2;
    }
    stHash_destruct(nodesToEdges);

    assert(stList_length(cycle) / 2 == stSortedSet_size(nodes));

    return nodes;
}