Пример #1
0
void testCap_segmentCoordinatesReverseStrand(CuTest* testCase) {
    /*
     * Tests the coordinates of an segment and its 5 and 3 prime caps.
     */
    cactusCapTestSetup();

    Block *block = block_construct(3, flower);
    Segment *segment = segment_construct2(block, 2, 0, sequence);
    Cap *_5Cap = segment_get5Cap(segment);
    Cap *_3Cap = segment_get3Cap(segment);

    CuAssertTrue(testCase, cap_getSide(_5Cap));
    CuAssertTrue(testCase, !cap_getSide(_3Cap));
    CuAssertTrue(testCase, !cap_getStrand(_5Cap));
    CuAssertTrue(testCase, !cap_getStrand(_3Cap));
    CuAssertIntEquals(testCase, 4, cap_getCoordinate(_5Cap));
    CuAssertIntEquals(testCase, 2, cap_getCoordinate(_3Cap));
    CuAssertTrue(testCase, !segment_getStrand(segment));
    CuAssertIntEquals(testCase, 4, segment_getStart(segment));
    CuAssertIntEquals(testCase, 3, segment_getLength(segment));

    CuAssertTrue(testCase, !cap_getSide(cap_getReverse(_5Cap)));
    CuAssertTrue(testCase, cap_getSide(cap_getReverse(_3Cap)));
    CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_5Cap)));
    CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_3Cap)));
    CuAssertIntEquals(testCase, 4, cap_getCoordinate(cap_getReverse(_5Cap)));
    CuAssertIntEquals(testCase, 2, cap_getCoordinate(cap_getReverse(_3Cap)));
    CuAssertTrue(testCase, segment_getStrand(segment_getReverse(segment)));
    CuAssertIntEquals(testCase, 2, segment_getStart(segment_getReverse(segment)));
    CuAssertIntEquals(testCase, 3, segment_getLength(segment_getReverse(segment)));

    cactusCapTestTeardown();
}
Пример #2
0
static void debugScaffoldPaths(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stHash *haplotypeToMaximalHaplotypeLengthHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        //Traversing from 5' end..
        Segment *_5Segment = stList_get(haplotypePath, 0);
        Segment *_3Segment = stList_get(haplotypePath, stList_length(haplotypePath) - 1);
        assert(segment_getStrand(_5Segment) == segment_getStrand(_3Segment));
        if (!segment_getStrand(_5Segment)) {
            Segment *j = _5Segment;
            _5Segment = segment_getReverse(_3Segment);
            _3Segment = segment_getReverse(j);
        }
        assert(segment_getStrand(_5Segment));
        assert(segment_getStrand(_3Segment));
        Cap *_5Cap = segment_get5Cap(_5Segment);
        Cap *_3Cap = segment_get3Cap(_3Segment);
        if (getAdjacentCapsSegment(_5Cap) != NULL) {
            assert(!trueAdjacency(_5Cap, haplotypeEventStrings));
        }
        if (getAdjacentCapsSegment(_3Cap) != NULL) {
            assert(!trueAdjacency(_3Cap, haplotypeEventStrings));
        }
        debugScaffoldPathsP(_5Cap, haplotypePath,
                haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash,
                segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 1);
        debugScaffoldPathsP(_3Cap, haplotypePath,
                haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash,
                segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 0);
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
}
Пример #3
0
bool linked(Segment *segmentX, Segment *segmentY, int64_t difference,
        const char *eventString, bool *aligned) {
    assert(segment_getStrand(segmentX));
    assert(segment_getStrand(segmentY));
    *aligned = 0;
    if (segment_getStart(segmentX) < segment_getStart(segmentY)) {
        Block *blockX = segment_getBlock(segmentX);
        Block *blockY = segment_getBlock(segmentY);
        Block_InstanceIterator *instanceItX = block_getInstanceIterator(blockX);
        Segment *segmentX2;
        while ((segmentX2 = block_getNext(instanceItX)) != NULL) {
            if (strcmp(event_getHeader(segment_getEvent(segmentX2)),
                    eventString) == 0) {
                Block_InstanceIterator *instanceItY =
                        block_getInstanceIterator(blockY);
                Segment *segmentY2;
                while ((segmentY2 = block_getNext(instanceItY)) != NULL) {
                    if (strcmp(event_getHeader(segment_getEvent(segmentY2)),
                            eventString) == 0) {
                        *aligned = 1;
                        if (sequence_getMetaSequence(
                                segment_getSequence(segmentX2))
                                == sequence_getMetaSequence(
                                        segment_getSequence(segmentY2))) { //Have the same assembly sequence
                            //Now check if the two segments are connected by a path of adjacency from the 3' end of segmentX to the 5' end of segmentY.
                            int64_t separationDistance;
                            if (capsAreAdjacent(segment_get3Cap(segmentX2),
                                    segment_get5Cap(segmentY2),
                                    &separationDistance)) {
                                //if(difference < 10000 || (separationDistance <=  difference * 1.5 && difference <= separationDistance * 1.5)) {
                                block_destructInstanceIterator(instanceItX);
                                block_destructInstanceIterator(instanceItY);
                                return 1;
                                //}
                            }
                        }
                    }
                }
                block_destructInstanceIterator(instanceItY);
            }
        }
        block_destructInstanceIterator(instanceItX);
    } else {
        assert(segmentX == segmentY);
        if(hasCapInEvent(block_get5End(segment_getBlock(segmentX)),
                eventString)) {
            *aligned = 1;
            return 1;
        }
    }
    return 0;
}
Пример #4
0
static stList *getSubstringsForFlowerSegments(stList *flowers) {
    /*
     * Get the set of substrings representing the strings in the segments of the given flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *blockIt = flower_getBlockIterator(flower);
        Block *block;
        while ((block = flower_getNextBlock(blockIt)) != NULL) {
            Block_InstanceIterator *instanceIt = block_getInstanceIterator(block);
            Segment *segment;
            while ((segment = block_getNext(instanceIt)) != NULL) {
                Sequence *sequence;
                if ((sequence = segment_getSequence(segment)) != NULL) {
                    segment = segment_getStrand(segment) ? segment : segment_getReverse(segment);
                    assert(segment_getLength(segment) > 0);
                    stList_append(substrings,
                            substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                    segment_getStart(segment) - sequence_getStart(sequence),
                                    segment_getLength(segment)));
                }
            }
            block_destructInstanceIterator(instanceIt);
        }
        flower_destructBlockIterator(blockIt);
    }
    return substrings;
}
void printPositions(stList *positions, const char *substitutionType, FILE *fileHandle) {
    for (int64_t i = 0; i < stList_length(positions); i++) {
        SegmentHolder *segmentHolder = stList_get(positions, i);
        int64_t j = segment_getStart(segmentHolder->segment);
        if (segment_getStrand(segmentHolder->segment)) {
            j += segmentHolder->offset;
            assert(
                    cap_getCoordinate(segment_get5Cap(segmentHolder->segment)) == segment_getStart(
                            segmentHolder->segment));
            assert(
                    segment_getStart(segmentHolder->segment) + segment_getLength(segmentHolder->segment) - 1
                            == cap_getCoordinate(segment_get3Cap(segmentHolder->segment)));
        } else {
            j -= segmentHolder->offset;
            assert(
                    segment_getStart(segmentHolder->segment) - segment_getLength(segmentHolder->segment) + 1
                            == cap_getCoordinate(segment_get3Cap(segmentHolder->segment)));
        }

        fprintf(fileHandle, "%s: %s_%" PRIi64 " %" PRIi64 " %c %c %c\n", substitutionType,
                event_getHeader(segment_getEvent(segmentHolder->segment)),
                sequence_getLength(segment_getSequence(segmentHolder->segment)), j,
                segmentHolder->base1, segmentHolder->base2, segmentHolder->base3);
        getMAFBlock(segment_getBlock(segmentHolder->segment), fileHandle);
    }
}
Пример #6
0
static int segmentCompareFn(const void *segment1, const void *segment2) {
    Name name1 = segment1 == &segmentCompareFn_coordinate ? segmentCompareFn_metaSequence : metaSequence_getName(sequence_getMetaSequence(segment_getSequence((Segment *)segment1)));

    Name name2 = segment2 == &segmentCompareFn_coordinate ? segmentCompareFn_metaSequence : metaSequence_getName(sequence_getMetaSequence(segment_getSequence((Segment *)segment2)));
    int i = cactusMisc_nameCompare(name1, name2);
    if(i == 0) {
        int64_t
                x =
                        segment1 == &segmentCompareFn_coordinate ? segmentCompareFn_coordinate
                                : (int64_t) segment_getStart((void *) segment1);
        int64_t
                y =
                        segment2 == &segmentCompareFn_coordinate ? segmentCompareFn_coordinate
                                : (int64_t) segment_getStart((void *) segment2);
        assert(
                segment1 == &segmentCompareFn_coordinate || segment_getStrand(
                        (void *) segment1));
        assert(
                segment2 == &segmentCompareFn_coordinate || segment_getStrand(
                        (void *) segment2));
        return x > y ? 1 : (x < y ? -1 : 0); //i > 0 ? 1 : i < 0 ? -1 : 0; //This was because of an overflow
    }
    return i;
}
Пример #7
0
Segment *block_splitP(Segment *segment,
		Block *leftBlock, Block *rightBlock) {
	Segment *leftSegment = segment_getSequence(segment) != NULL
				? segment_construct2(leftBlock, segment_getStart(segment), segment_getStrand(segment), segment_getSequence(segment))
				: segment_construct(leftBlock, segment_getEvent(segment));
				Segment *rightSegment = segment_getSequence(segment) != NULL
				? segment_construct2(rightBlock, segment_getStart(segment) + block_getLength(leftBlock), segment_getStrand(segment), segment_getSequence(segment))
						: segment_construct(rightBlock, segment_getEvent(segment));
	//link together.
	cap_makeAdjacent(segment_get3Cap(leftSegment), segment_get5Cap(rightSegment));
	//update adjacencies.
	Cap *_5Cap = segment_get5Cap(segment);
	Cap *new5Cap = segment_get5Cap(leftSegment);
	Cap *_3Cap = segment_get3Cap(segment);
	Cap *new3Cap = segment_get3Cap(rightSegment);
	if(cap_getAdjacency(_5Cap) != NULL) {
		cap_makeAdjacent(cap_getAdjacency(_5Cap), new5Cap);
	}
	if(cap_getAdjacency(_3Cap) != NULL) {
		cap_makeAdjacent(cap_getAdjacency(_3Cap), new3Cap);
	}
	return leftSegment;
}
Пример #8
0
static void getOrderedSegmentsP(Flower *flower,
        stSortedSet *segments) {
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
            if (!segment_getStrand(segment)) {
                segment = segment_getReverse(segment);
            }
            assert(stSortedSet_search(segments, segment) == NULL);
            stSortedSet_insert(segments, segment);
    }
    flower_destructSegmentIterator(segmentIt);
    //Recurse over the flowers
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getOrderedSegmentsP(group_getNestedFlower(group),
                    segments);
        }
    }
    flower_destructGroupIterator(groupIt);
}
Пример #9
0
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths);
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stSortedSet *bucket = stSortedSet_construct();
        stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket);
        stSortedSet_insert(bucket, stList_get(haplotypePaths, i));
    }
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        Segment *_5Segment = stList_get(haplotypePath, 0);
        if (!segment_getStrand(_5Segment)) {
            _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1));
        }
        assert(segment_getStrand(_5Segment));
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings));
        }
        int64_t insertLength;
        int64_t deleteLength;
        Cap *otherCap;
        enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
        if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) {
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL);
            int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0);
            Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment));
            assert(adjacentSegment != NULL);
            while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end
                adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment));
                assert(adjacentSegment != NULL);
            }
            assert(adjacentSegment != NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end
            stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
            if (adjacentHaplotypePath == NULL) {
                adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(
                        adjacentSegment));
            }
            assert(adjacentHaplotypePath != NULL);
            assert(adjacentHaplotypePath != haplotypePath);
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL);
            int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0);

            //Now merge the buckets and make new int tuples..
            stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath);
            stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath);
            assert(bucket1 != NULL);
            assert(bucket2 != NULL);
            assert(bucket1 != bucket2);
            stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2);
            stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3);
            stList *l;
            while ((l = stSortedSet_getNext(bucketIt)) != NULL) {
                //Do the bucket first
                assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2);
                stHash_remove(haplotypePathToScaffoldPathHash, l);
                stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3);
                //Now the length
                stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l);
                assert(m != NULL);
                assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k);
                stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k));
                stIntTuple_destruct(m);
            }
            assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3);
            assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3);
            stSortedSet_destructIterator(bucketIt);
        }
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
    return haplotypeToMaximalHaplotypeLengthHash;
}