void testCap_segmentCoordinatesReverseStrand(CuTest* testCase) { /* * Tests the coordinates of an segment and its 5 and 3 prime caps. */ cactusCapTestSetup(); Block *block = block_construct(3, flower); Segment *segment = segment_construct2(block, 2, 0, sequence); Cap *_5Cap = segment_get5Cap(segment); Cap *_3Cap = segment_get3Cap(segment); CuAssertTrue(testCase, cap_getSide(_5Cap)); CuAssertTrue(testCase, !cap_getSide(_3Cap)); CuAssertTrue(testCase, !cap_getStrand(_5Cap)); CuAssertTrue(testCase, !cap_getStrand(_3Cap)); CuAssertIntEquals(testCase, 4, cap_getCoordinate(_5Cap)); CuAssertIntEquals(testCase, 2, cap_getCoordinate(_3Cap)); CuAssertTrue(testCase, !segment_getStrand(segment)); CuAssertIntEquals(testCase, 4, segment_getStart(segment)); CuAssertIntEquals(testCase, 3, segment_getLength(segment)); CuAssertTrue(testCase, !cap_getSide(cap_getReverse(_5Cap))); CuAssertTrue(testCase, cap_getSide(cap_getReverse(_3Cap))); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_5Cap))); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_3Cap))); CuAssertIntEquals(testCase, 4, cap_getCoordinate(cap_getReverse(_5Cap))); CuAssertIntEquals(testCase, 2, cap_getCoordinate(cap_getReverse(_3Cap))); CuAssertTrue(testCase, segment_getStrand(segment_getReverse(segment))); CuAssertIntEquals(testCase, 2, segment_getStart(segment_getReverse(segment))); CuAssertIntEquals(testCase, 3, segment_getLength(segment_getReverse(segment))); cactusCapTestTeardown(); }
static void debugScaffoldPaths(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); //Traversing from 5' end.. Segment *_5Segment = stList_get(haplotypePath, 0); Segment *_3Segment = stList_get(haplotypePath, stList_length(haplotypePath) - 1); assert(segment_getStrand(_5Segment) == segment_getStrand(_3Segment)); if (!segment_getStrand(_5Segment)) { Segment *j = _5Segment; _5Segment = segment_getReverse(_3Segment); _3Segment = segment_getReverse(j); } assert(segment_getStrand(_5Segment)); assert(segment_getStrand(_3Segment)); Cap *_5Cap = segment_get5Cap(_5Segment); Cap *_3Cap = segment_get3Cap(_3Segment); if (getAdjacentCapsSegment(_5Cap) != NULL) { assert(!trueAdjacency(_5Cap, haplotypeEventStrings)); } if (getAdjacentCapsSegment(_3Cap) != NULL) { assert(!trueAdjacency(_3Cap, haplotypeEventStrings)); } debugScaffoldPathsP(_5Cap, haplotypePath, haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash, segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 1); debugScaffoldPathsP(_3Cap, haplotypePath, haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash, segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 0); } stHash_destruct(segmentToMaximalHaplotypePathHash); }
bool linked(Segment *segmentX, Segment *segmentY, int64_t difference, const char *eventString, bool *aligned) { assert(segment_getStrand(segmentX)); assert(segment_getStrand(segmentY)); *aligned = 0; if (segment_getStart(segmentX) < segment_getStart(segmentY)) { Block *blockX = segment_getBlock(segmentX); Block *blockY = segment_getBlock(segmentY); Block_InstanceIterator *instanceItX = block_getInstanceIterator(blockX); Segment *segmentX2; while ((segmentX2 = block_getNext(instanceItX)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segmentX2)), eventString) == 0) { Block_InstanceIterator *instanceItY = block_getInstanceIterator(blockY); Segment *segmentY2; while ((segmentY2 = block_getNext(instanceItY)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segmentY2)), eventString) == 0) { *aligned = 1; if (sequence_getMetaSequence( segment_getSequence(segmentX2)) == sequence_getMetaSequence( segment_getSequence(segmentY2))) { //Have the same assembly sequence //Now check if the two segments are connected by a path of adjacency from the 3' end of segmentX to the 5' end of segmentY. int64_t separationDistance; if (capsAreAdjacent(segment_get3Cap(segmentX2), segment_get5Cap(segmentY2), &separationDistance)) { //if(difference < 10000 || (separationDistance <= difference * 1.5 && difference <= separationDistance * 1.5)) { block_destructInstanceIterator(instanceItX); block_destructInstanceIterator(instanceItY); return 1; //} } } } } block_destructInstanceIterator(instanceItY); } } block_destructInstanceIterator(instanceItX); } else { assert(segmentX == segmentY); if(hasCapInEvent(block_get5End(segment_getBlock(segmentX)), eventString)) { *aligned = 1; return 1; } } return 0; }
static stList *getSubstringsForFlowerSegments(stList *flowers) { /* * Get the set of substrings representing the strings in the segments of the given flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *blockIt = flower_getBlockIterator(flower); Block *block; while ((block = flower_getNextBlock(blockIt)) != NULL) { Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = segment_getSequence(segment)) != NULL) { segment = segment_getStrand(segment) ? segment : segment_getReverse(segment); assert(segment_getLength(segment) > 0); stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, segment_getStart(segment) - sequence_getStart(sequence), segment_getLength(segment))); } } block_destructInstanceIterator(instanceIt); } flower_destructBlockIterator(blockIt); } return substrings; }
void printPositions(stList *positions, const char *substitutionType, FILE *fileHandle) { for (int64_t i = 0; i < stList_length(positions); i++) { SegmentHolder *segmentHolder = stList_get(positions, i); int64_t j = segment_getStart(segmentHolder->segment); if (segment_getStrand(segmentHolder->segment)) { j += segmentHolder->offset; assert( cap_getCoordinate(segment_get5Cap(segmentHolder->segment)) == segment_getStart( segmentHolder->segment)); assert( segment_getStart(segmentHolder->segment) + segment_getLength(segmentHolder->segment) - 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } else { j -= segmentHolder->offset; assert( segment_getStart(segmentHolder->segment) - segment_getLength(segmentHolder->segment) + 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } fprintf(fileHandle, "%s: %s_%" PRIi64 " %" PRIi64 " %c %c %c\n", substitutionType, event_getHeader(segment_getEvent(segmentHolder->segment)), sequence_getLength(segment_getSequence(segmentHolder->segment)), j, segmentHolder->base1, segmentHolder->base2, segmentHolder->base3); getMAFBlock(segment_getBlock(segmentHolder->segment), fileHandle); } }
static int segmentCompareFn(const void *segment1, const void *segment2) { Name name1 = segment1 == &segmentCompareFn_coordinate ? segmentCompareFn_metaSequence : metaSequence_getName(sequence_getMetaSequence(segment_getSequence((Segment *)segment1))); Name name2 = segment2 == &segmentCompareFn_coordinate ? segmentCompareFn_metaSequence : metaSequence_getName(sequence_getMetaSequence(segment_getSequence((Segment *)segment2))); int i = cactusMisc_nameCompare(name1, name2); if(i == 0) { int64_t x = segment1 == &segmentCompareFn_coordinate ? segmentCompareFn_coordinate : (int64_t) segment_getStart((void *) segment1); int64_t y = segment2 == &segmentCompareFn_coordinate ? segmentCompareFn_coordinate : (int64_t) segment_getStart((void *) segment2); assert( segment1 == &segmentCompareFn_coordinate || segment_getStrand( (void *) segment1)); assert( segment2 == &segmentCompareFn_coordinate || segment_getStrand( (void *) segment2)); return x > y ? 1 : (x < y ? -1 : 0); //i > 0 ? 1 : i < 0 ? -1 : 0; //This was because of an overflow } return i; }
Segment *block_splitP(Segment *segment, Block *leftBlock, Block *rightBlock) { Segment *leftSegment = segment_getSequence(segment) != NULL ? segment_construct2(leftBlock, segment_getStart(segment), segment_getStrand(segment), segment_getSequence(segment)) : segment_construct(leftBlock, segment_getEvent(segment)); Segment *rightSegment = segment_getSequence(segment) != NULL ? segment_construct2(rightBlock, segment_getStart(segment) + block_getLength(leftBlock), segment_getStrand(segment), segment_getSequence(segment)) : segment_construct(rightBlock, segment_getEvent(segment)); //link together. cap_makeAdjacent(segment_get3Cap(leftSegment), segment_get5Cap(rightSegment)); //update adjacencies. Cap *_5Cap = segment_get5Cap(segment); Cap *new5Cap = segment_get5Cap(leftSegment); Cap *_3Cap = segment_get3Cap(segment); Cap *new3Cap = segment_get3Cap(rightSegment); if(cap_getAdjacency(_5Cap) != NULL) { cap_makeAdjacent(cap_getAdjacency(_5Cap), new5Cap); } if(cap_getAdjacency(_3Cap) != NULL) { cap_makeAdjacent(cap_getAdjacency(_3Cap), new3Cap); } return leftSegment; }
static void getOrderedSegmentsP(Flower *flower, stSortedSet *segments) { Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (!segment_getStrand(segment)) { segment = segment_getReverse(segment); } assert(stSortedSet_search(segments, segment) == NULL); stSortedSet_insert(segments, segment); } flower_destructSegmentIterator(segmentIt); //Recurse over the flowers Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getOrderedSegmentsP(group_getNestedFlower(group), segments); } } flower_destructGroupIterator(groupIt); }
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths); stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stSortedSet *bucket = stSortedSet_construct(); stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket); stSortedSet_insert(bucket, stList_get(haplotypePaths, i)); } for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); Segment *_5Segment = stList_get(haplotypePath, 0); if (!segment_getStrand(_5Segment)) { _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1)); } assert(segment_getStrand(_5Segment)); if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) { assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings)); } int64_t insertLength; int64_t deleteLength; Cap *otherCap; enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters); if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) { assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL); int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0); Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment)); assert(adjacentSegment != NULL); while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment)); assert(adjacentSegment != NULL); } assert(adjacentSegment != NULL); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment); if (adjacentHaplotypePath == NULL) { adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse( adjacentSegment)); } assert(adjacentHaplotypePath != NULL); assert(adjacentHaplotypePath != haplotypePath); assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL); int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0); //Now merge the buckets and make new int tuples.. stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath); stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath); assert(bucket1 != NULL); assert(bucket2 != NULL); assert(bucket1 != bucket2); stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2); stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3); stList *l; while ((l = stSortedSet_getNext(bucketIt)) != NULL) { //Do the bucket first assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2); stHash_remove(haplotypePathToScaffoldPathHash, l); stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3); //Now the length stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l); assert(m != NULL); assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k); stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k)); stIntTuple_destruct(m); } assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3); assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3); stSortedSet_destructIterator(bucketIt); } } stHash_destruct(segmentToMaximalHaplotypePathHash); return haplotypeToMaximalHaplotypeLengthHash; }