static int64_t setCoordinates(Flower *flower, MetaSequence *metaSequence, Cap *cap, int64_t coordinate) {
    /*
     * Sets the coordinates of the reference thread and sets the bases of the actual sequence
     * that of the consensus.
     */
    Sequence *sequence = flower_getSequence(flower, metaSequence_getName(metaSequence));
    if (sequence == NULL) {
        sequence = sequence_construct(metaSequence, flower);
    }
    while (1) {
        assert(cap_getStrand(cap));
        assert(!cap_getSide(cap));
        Cap *adjacentCap = cap_getAdjacency(cap);
        assert(adjacentCap != NULL);
        assert(cap_getStrand(adjacentCap));
        assert(cap_getSide(adjacentCap));
        int64_t adjacencyLength = cap_getCoordinate(cap);
        assert(adjacencyLength == cap_getCoordinate(adjacentCap));
        assert(adjacencyLength != INT64_MAX);
        assert(adjacencyLength >= 0);
        cap_setCoordinates(cap, coordinate, 1, sequence);
        coordinate += adjacencyLength + 1;
        cap_setCoordinates(adjacentCap, coordinate, 1, sequence);
        //Traverse any block..
        if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) {
            break;
        }
        coordinate += segment_getLength(cap_getSegment(adjacentCap)) - 1;
    }
    return coordinate;
}
static void setAdjacencyLength(Cap *cap, Cap *adjacentCap, int64_t adjacencyLength) {
    //Set the coordinates of the caps to the adjacency size
    cap_setCoordinates(cap, adjacencyLength, cap_getStrand(cap), NULL);
    cap_setCoordinates(adjacentCap, adjacencyLength, cap_getStrand(adjacentCap),
    NULL);
    assert(cap_getCoordinate(cap) == cap_getCoordinate(adjacentCap));
}
Example #3
0
void testCap_segmentCoordinatesReverseStrand(CuTest* testCase) {
    /*
     * Tests the coordinates of an segment and its 5 and 3 prime caps.
     */
    cactusCapTestSetup();

    Block *block = block_construct(3, flower);
    Segment *segment = segment_construct2(block, 2, 0, sequence);
    Cap *_5Cap = segment_get5Cap(segment);
    Cap *_3Cap = segment_get3Cap(segment);

    CuAssertTrue(testCase, cap_getSide(_5Cap));
    CuAssertTrue(testCase, !cap_getSide(_3Cap));
    CuAssertTrue(testCase, !cap_getStrand(_5Cap));
    CuAssertTrue(testCase, !cap_getStrand(_3Cap));
    CuAssertIntEquals(testCase, 4, cap_getCoordinate(_5Cap));
    CuAssertIntEquals(testCase, 2, cap_getCoordinate(_3Cap));
    CuAssertTrue(testCase, !segment_getStrand(segment));
    CuAssertIntEquals(testCase, 4, segment_getStart(segment));
    CuAssertIntEquals(testCase, 3, segment_getLength(segment));

    CuAssertTrue(testCase, !cap_getSide(cap_getReverse(_5Cap)));
    CuAssertTrue(testCase, cap_getSide(cap_getReverse(_3Cap)));
    CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_5Cap)));
    CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_3Cap)));
    CuAssertIntEquals(testCase, 4, cap_getCoordinate(cap_getReverse(_5Cap)));
    CuAssertIntEquals(testCase, 2, cap_getCoordinate(cap_getReverse(_3Cap)));
    CuAssertTrue(testCase, segment_getStrand(segment_getReverse(segment)));
    CuAssertIntEquals(testCase, 2, segment_getStart(segment_getReverse(segment)));
    CuAssertIntEquals(testCase, 3, segment_getLength(segment_getReverse(segment)));

    cactusCapTestTeardown();
}
bool capsAreAdjacent(Cap *cap1, Cap *cap2, int64_t *separationDistance) {
    if (cap_getName(cap2) != cap_getName(cap1) && cap_getCoordinate(cap1) != cap_getCoordinate(cap2)) { //This can happen if end1 == end2
        if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) {
            assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader(
                                    cap_getEvent(cap2))) == 0);
            assert(cap_getPositiveOrientation(cap1)
                    != cap_getPositiveOrientation(cap2));
            assert(cap_getName(cap1) != cap_getName(cap2));
            assert(sequence_getMetaSequence(cap_getSequence(cap1))
                                == sequence_getMetaSequence(cap_getSequence(cap2)));

            if (!cap_getStrand(cap1)) {
                cap1 = cap_getReverse(cap1);
            }
            if (!cap_getStrand(cap2)) {
                cap2 = cap_getReverse(cap2);
            }
            assert(cap_getStrand(cap1));
            assert(cap_getStrand(cap2));
            if (cap_getCoordinate(cap1) < cap_getCoordinate(cap2)) {
                if (!cap_getSide(cap1) && cap_getSide(cap2)) {
                    *separationDistance = cap_getCoordinate(cap2) - cap_getCoordinate(cap1) - 1; //The minus 1, to give the length of the sequence between the two caps.
                    return 1;
                }
            } else {
                if (cap_getSide(cap1) && !cap_getSide(cap2)) {
                    *separationDistance = cap_getCoordinate(cap1) - cap_getCoordinate(cap2) - 1;
                    return 1;
                }
            }
        }
    }
    return 0;
}
static void setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(Cap *cap, stList *recoveredCaps) {
    /*
     * Sets the coordinates of the caps to be equal to the length of the adjacency sequence between them.
     * Used to build the reference sequence bottom up.
     *
     * One complexity is that a reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    while (1) {
        Cap *adjacentCap = cap_getAdjacency(cap);
        assert(adjacentCap != NULL);
        assert(cap_getCoordinate(cap) == INT64_MAX);
        assert(cap_getCoordinate(adjacentCap) == INT64_MAX);
        assert(cap_getStrand(cap) == cap_getStrand(adjacentCap));
        assert(cap_getSide(cap) != cap_getSide(adjacentCap));
        Group *group = end_getGroup(cap_getEnd(cap));
        assert(group != NULL);
        if (!group_isLeaf(group)) { //Adjacency is not terminal, so establish its sequence.
            Flower *nestedFlower = group_getNestedFlower(group);
            Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
            assert(nestedCap != NULL);
            Cap *nestedAdjacentCap = flower_getCap(nestedFlower, cap_getName(adjacentCap));
            assert(nestedAdjacentCap != NULL);
            Cap *breakerCap;
            int64_t adjacencyLength = traceThreadLength(nestedCap, &breakerCap);
            assert(cap_getOrientation(nestedAdjacentCap));
            if (cap_getPositiveOrientation(breakerCap) != nestedAdjacentCap) { //The thread is broken at the lower level.
                //Copy cap into higher level graph.
                breakerCap = copyCapToParent(breakerCap, recoveredCaps);
                assert(cap_getSide(breakerCap));
                cap_makeAdjacent(cap, breakerCap);
                setAdjacencyLength(cap, breakerCap, adjacencyLength);
                adjacencyLength = traceThreadLength(nestedAdjacentCap, &breakerCap);
                assert(cap_getPositiveOrientation(breakerCap) != cap);
                breakerCap = copyCapToParent(breakerCap, recoveredCaps);
                assert(!cap_getSide(breakerCap));
                cap_makeAdjacent(breakerCap, adjacentCap);
                setAdjacencyLength(adjacentCap, breakerCap, adjacencyLength);
            } else { //The thread is not broken at the lower level
                setAdjacencyLength(cap, adjacentCap, adjacencyLength);
            }
        } else {
            //Set the coordinates of the caps to the adjacency size
            setAdjacencyLength(cap, adjacentCap, 0);
        }
        if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) {
            break;
        }
    }
}
Example #6
0
void testCap_setCoordinate(CuTest* testCase) {
    cactusCapTestSetup();
    CuAssertTrue(testCase, cap_getCoordinate(rootCap) == INT64_MAX);
    CuAssertTrue(testCase, cap_getStrand(rootCap));
    CuAssertTrue(testCase, cap_getSequence(rootCap) == NULL);
    cap_setCoordinates(rootCap, 5, 0, NULL);
    CuAssertTrue(testCase, cap_getCoordinate(rootCap) == 5);
    CuAssertTrue(testCase, !cap_getStrand(rootCap));
    CuAssertTrue(testCase, cap_getSequence(rootCap) == NULL);
    cap_setCoordinates(rootCap, INT64_MAX, 1, NULL);
    CuAssertTrue(testCase, cap_getCoordinate(rootCap) == INT64_MAX);
    CuAssertTrue(testCase, cap_getStrand(rootCap));
    CuAssertTrue(testCase, cap_getSequence(rootCap) == NULL);
    cactusCapTestTeardown();
}
static void recoverBrokenAdjacencies(Flower *flower, stList *recoveredCaps, Name referenceEventName) {
    /*
     * Find reference intervals that are book-ended by stubs created in a child flower.
     */
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while((group = flower_getNextGroup(groupIt)) != NULL) {
        Flower *nestedFlower;
        if((nestedFlower = group_getNestedFlower(group)) != NULL) {
            Flower_EndIterator *endIt = flower_getEndIterator(nestedFlower);
            End *childEnd;
            while((childEnd = flower_getNextEnd(endIt)) != NULL) {
                if(end_isStubEnd(childEnd) && flower_getEnd(flower, end_getName(childEnd)) == NULL) { //We have a thread we need to promote
                    Cap *childCap = getCapForReferenceEvent(childEnd, referenceEventName); //The cap in the reference
                    assert(childCap != NULL);
                    assert(!end_isAttached(childEnd));
                    childCap = cap_getStrand(childCap) ? childCap : cap_getReverse(childCap);
                    if (!cap_getSide(childCap)) {
                        Cap *adjacentChildCap = NULL;
                        int64_t adjacencyLength = traceThreadLength(childCap, &adjacentChildCap);
                        Cap *cap = copyCapToParent(childCap, recoveredCaps);
                        assert(adjacentChildCap != NULL);
                        assert(!end_isAttached(cap_getEnd(adjacentChildCap)));
                        assert(!cap_getSide(cap));
                        Cap *adjacentCap = copyCapToParent(adjacentChildCap, recoveredCaps);
                        cap_makeAdjacent(cap, adjacentCap);
                        setAdjacencyLength(cap, adjacentCap, adjacencyLength);
                    }
                }
            }
            flower_destructEndIterator(endIt);
        }
    }
    flower_destructGroupIterator(groupIt);
}
Example #8
0
void stCaf_addAdjacencies(Flower *flower) {
    //Build a list of caps.
    stList *list = stList_construct();
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        End_InstanceIterator *instanceIterator = end_getInstanceIterator(end);
        Cap *cap;
        while ((cap = end_getNext(instanceIterator)) != NULL) {
            if (!cap_getStrand(cap)) {
                cap = cap_getReverse(cap);
            }
            stList_append(list, cap);
        }
        end_destructInstanceIterator(instanceIterator);
    }
    flower_destructEndIterator(endIterator);
    assert(stList_length(list) % 2 == 0);
    //Sort the list of caps.
    stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP);
    //Now make the adjacencies.
    for (int64_t i = 1; i < stList_length(list); i += 2) {
        Cap *cap = stList_get(list, i - 1);
        Cap *cap2 = stList_get(list, i);
        cap_makeAdjacent(cap, cap2);
    }
    //Clean up.
    stList_destruct(list);
}
Example #9
0
static int addAdjacenciesPP(Cap *cap1, Cap *cap2) {
    assert(cap_getStrand(cap1) && cap_getStrand(cap2));
    Sequence *sequence1 = cap_getSequence(cap1);
    Sequence *sequence2 = cap_getSequence(cap2);
    int64_t i = cactusMisc_nameCompare(sequence_getName(sequence1), sequence_getName(sequence2));
    if (i == 0) {
        int64_t j = cap_getCoordinate(cap1);
        int64_t k = cap_getCoordinate(cap2);
        i = j > k ? 1 : (j < k ? -1 : 0);
        if (i == 0) {
            assert(cap_getSegment(cap1) == cap_getSegment(cap2));
            j = cap_getSide(cap1);
            k = cap_getSide(cap2);
            assert((j && !k) || (!j && k));
            i = j ? -1 : 1;
        }
    }
    return i;
}
void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName,
              bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) {
    /*
     * A reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    stList *caps = getCaps(flowers, referenceEventName);
    for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list.
        setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps);
    }
    for(int64_t i=0; i<stList_length(flowers); i++) {
        recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName);
    }

    //Build the phylogenetic event trees for base calling.
    segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree);
    for(int64_t i=0; i<stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName);
        assert(refEvent != NULL);
        stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix));
    }

    if (isTop) {
        stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn,
                terminalAdjacencyWriteFn);
        assert(stList_length(threadStrings) == stList_length(caps));

        int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences.
        for (int64_t i = 0; i < stList_length(threadStrings); i++) {
            Cap *cap = stList_get(caps, i);
            assert(cap_getStrand(cap));
            assert(!cap_getSide(cap));
            Flower *flower = end_getFlower(cap_getEnd(cap));
            char *threadString = stList_get(threadStrings, i);
            bool trivialString = isTrivialString(&threadString); //This alters the original string
            MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++,
                    threadString, trivialString);
            free(threadString);
            int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1);
            (void) endCoordinate;
            assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence));
        }
        stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop
        stList_destruct(threadStrings);
    } else {
        buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn);
    }
    stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash);
    stList_destruct(caps);
}
Example #11
0
int64_t flower_getTotalBaseLength(Flower *flower) {
    /*
     * The implementation of this function is very like that in group_getTotalBaseLength, with a few differences. Consider merging them.
     */
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    int64_t totalLength = 0;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        if (!end_isBlockEnd(end)) {
            End_InstanceIterator *instanceIterator = end_getInstanceIterator(end);
            Cap *cap;
            while ((cap = end_getNext(instanceIterator)) != NULL) {
                cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                if (!cap_getSide(cap) && cap_getSequence(cap) != NULL) {
                    Cap *cap2 = cap_getAdjacency(cap);
                    assert(cap2 != NULL);
                    while (end_isBlockEnd(cap_getEnd(cap2))) {
                        Segment *segment = cap_getSegment(cap2);
                        assert(segment != NULL);
                        assert(segment_get5Cap(segment) == cap2);
                        cap2 = cap_getAdjacency(segment_get3Cap(segment));
                        assert(cap2 != NULL);
                        assert(cap_getStrand(cap2));
                        assert(cap_getSide(cap2));
                    }
                    assert(cap_getStrand(cap2));
                    assert(cap_getSide(cap2));
                    int64_t length = cap_getCoordinate(cap2) - cap_getCoordinate(cap) - 1;
                    assert(length >= 0);
                    totalLength += length;
                }
            }
            end_destructInstanceIterator(instanceIterator);
        }
    }
    flower_destructEndIterator(endIterator);
    return totalLength;
}
void topDown(Flower *flower, Name referenceEventName) {
    /*
     * Run on each flower, top down. Sets the coordinates of each reference cap to the correct
     * sequence, and sets the bases of the reference sequence to be consensus bases.
     */
    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIt)) != NULL) {
        Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference
        if (cap != NULL) {
            cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
            if (!cap_getSide(cap)) {
                assert(cap_getCoordinate(cap) != INT64_MAX);
                Sequence *sequence = cap_getSequence(cap);
                assert(sequence != NULL);
                Group *group = end_getGroup(end);
                if (!group_isLeaf(group)) {
                    Flower *nestedFlower = group_getNestedFlower(group);
                    Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
                    assert(nestedCap != NULL);
                    nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap);
                    assert(cap_getStrand(nestedCap));
                    assert(!cap_getSide(nestedCap));
                    int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence),
                                                           nestedCap, cap_getCoordinate(cap));
                    (void) endCoordinate;
                    assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap)));
                    assert(endCoordinate
                           == cap_getCoordinate(
                               flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap)))));
                }
            }
        }
    }
    flower_destructEndIterator(endIt);
}
Example #13
0
void testCap_getStrand(CuTest* testCase) {
    cactusCapTestSetup();
    CuAssertTrue(testCase, cap_getStrand(rootCap));
    CuAssertTrue(testCase, !cap_getStrand(cap_getReverse(rootCap)));
    CuAssertTrue(testCase, cap_getStrand(leaf1Cap));
    CuAssertTrue(testCase, !cap_getStrand(cap_getReverse(leaf1Cap)));
    CuAssertTrue(testCase, !cap_getStrand(leaf2Cap));
    CuAssertTrue(testCase, cap_getStrand(cap_getReverse(leaf2Cap)));
    cactusCapTestTeardown();
}
static Cap *copyCapToParent(Cap *cap, stList *recoveredCaps) {
    /*
     * Get the adjacent stub end by looking at the reference adjacency in the parent.
     */
    End *end = cap_getEnd(cap);
    assert(end != NULL);
    Group *parentGroup = flower_getParentGroup(end_getFlower(end));
    assert(parentGroup != NULL);
    End *copiedEnd = end_copyConstruct(end, group_getFlower(parentGroup));
    end_setGroup(copiedEnd, parentGroup); //Set group
    Cap *copiedCap = end_getInstance(copiedEnd, cap_getName(cap));
    assert(copiedCap != NULL);
    copiedCap = cap_getStrand(copiedCap) ? copiedCap : cap_getReverse(copiedCap);
    if (!cap_getSide(copiedCap)) {
        stList_append(recoveredCaps, copiedCap);
    }
    return copiedCap;
}
char *getTerminalAdjacencySubString(Cap *cap) {
    if(getTerminalAdjacencyLength_ignoreAdjacencies) {
        return stString_copy("");
    }
    cap = getTerminalCap(cap);
    cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); //This ensures the asserts are as expected.
    Cap *adjacentCap = cap_getAdjacency(cap);
    int64_t i = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap);
    assert(i != 0);
    if (i > 0) {
        assert(cap_getSide(cap));
        assert(!cap_getSide(adjacentCap));
        return sequence_getString(cap_getSequence(cap),
                cap_getCoordinate(adjacentCap) + 1, i - 1, 1);
    } else {
        assert(cap_getSide(adjacentCap));
        assert(!cap_getSide(cap));
        return sequence_getString(cap_getSequence(cap), cap_getCoordinate(cap) + 1, -i - 1, 1);
    }
}
static stList *getCaps(stList *flowers, Name referenceEventName) {
    stList *caps = stList_construct();
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        //Get list of caps
        Flower_EndIterator *endIt = flower_getEndIterator(flower);
        End *end;
        while ((end = flower_getNextEnd(endIt)) != NULL) {
            if (end_isStubEnd(end)) {
                Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference
                if(cap != NULL) {
                    cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                    if (!cap_getSide(cap)) {
                        stList_append(caps, cap);
                    }
                }
            }
        }
        flower_destructEndIterator(endIt);
    }
    return caps;
}
Example #17
0
static stList *getSubstringsForFlowers(stList *flowers) {
    /*
     * Get the set of substrings for sequence intervals in the given set of flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *endIt = flower_getEndIterator(flower);
        End *end;
        while ((end = flower_getNextEnd(endIt)) != NULL) {
            if (end_isStubEnd(end)) {
                End_InstanceIterator *instanceIt = end_getInstanceIterator(end);
                Cap *cap;
                while ((cap = end_getNext(instanceIt)) != NULL) {
                    Sequence *sequence;
                    if ((sequence = cap_getSequence(cap)) != NULL) {
                        cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                        if (!cap_getSide(cap)) { //We have a sequence interval of interest
                            Cap *adjacentCap = cap_getAdjacency(cap);
                            assert(adjacentCap != NULL);
                            int64_t length = cap_getCoordinate(adjacentCap) - cap_getCoordinate(cap) - 1;
                            assert(length >= 0);
                            if (length > 0) {
                                stList_append(substrings,
                                        substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                                cap_getCoordinate(cap) + 1 - sequence_getStart(sequence), length));
                            }
                        }
                    }
                }
                end_destructInstanceIterator(instanceIt);
            }
        }
        flower_destructEndIterator(endIt);
    }
    return substrings;
}