static void debugScaffoldPathsP(Cap *cap, stList *haplotypePath, stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash, stHash *segmentToMaximalHaplotypePathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters, bool capDir) { int64_t insertLength; int64_t deleteLength; Cap *otherCap; enum CapCode capCode = getCapCode(cap, &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters); if (capCode == SCAFFOLD_GAP || capCode == AMBIGUITY_GAP) { Segment *adjacentSegment = getAdjacentCapsSegment(cap); assert(adjacentSegment != NULL); while (!hasCapInEvents(cap_getEnd(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)), haplotypeEventStrings)) { adjacentSegment = getAdjacentCapsSegment(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)); assert(adjacentSegment != NULL); } assert(adjacentSegment != NULL); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(adjacentSegment)))); stIntTuple *j = stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath); (void)j; assert(j != NULL); stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment); if (adjacentHaplotypePath == NULL) { adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(adjacentSegment)); } assert(adjacentHaplotypePath != NULL); assert(adjacentHaplotypePath != haplotypePath); stIntTuple *k = stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath); (void)k; assert(k != NULL); assert(stIntTuple_get(j, 0) == stIntTuple_get(k, 0)); assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath)); } }
void testCap_getSegment(CuTest* testCase) { cactusCapTestSetup(); Block *block = block_construct(2, flower); Segment *segment = segment_construct(block, rootEvent); CuAssertTrue(testCase, cap_getSegment(segment_get5Cap(segment)) == segment); CuAssertTrue(testCase, cap_getSegment(segment_get3Cap(segment)) == segment); CuAssertTrue(testCase, cap_getOrientation(segment_get5Cap(segment)) == segment_getOrientation(segment)); CuAssertTrue(testCase, cap_getOrientation(segment_get3Cap(segment)) == segment_getOrientation(segment)); CuAssertTrue(testCase, cap_getSegment(cap_getReverse(segment_get5Cap(segment))) == segment_getReverse(segment)); CuAssertTrue(testCase, cap_getSegment(cap_getReverse(segment_get3Cap(segment))) == segment_getReverse(segment)); cactusCapTestTeardown(); }
void testCap_segmentCoordinatesReverseStrand(CuTest* testCase) { /* * Tests the coordinates of an segment and its 5 and 3 prime caps. */ cactusCapTestSetup(); Block *block = block_construct(3, flower); Segment *segment = segment_construct2(block, 2, 0, sequence); Cap *_5Cap = segment_get5Cap(segment); Cap *_3Cap = segment_get3Cap(segment); CuAssertTrue(testCase, cap_getSide(_5Cap)); CuAssertTrue(testCase, !cap_getSide(_3Cap)); CuAssertTrue(testCase, !cap_getStrand(_5Cap)); CuAssertTrue(testCase, !cap_getStrand(_3Cap)); CuAssertIntEquals(testCase, 4, cap_getCoordinate(_5Cap)); CuAssertIntEquals(testCase, 2, cap_getCoordinate(_3Cap)); CuAssertTrue(testCase, !segment_getStrand(segment)); CuAssertIntEquals(testCase, 4, segment_getStart(segment)); CuAssertIntEquals(testCase, 3, segment_getLength(segment)); CuAssertTrue(testCase, !cap_getSide(cap_getReverse(_5Cap))); CuAssertTrue(testCase, cap_getSide(cap_getReverse(_3Cap))); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_5Cap))); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_3Cap))); CuAssertIntEquals(testCase, 4, cap_getCoordinate(cap_getReverse(_5Cap))); CuAssertIntEquals(testCase, 2, cap_getCoordinate(cap_getReverse(_3Cap))); CuAssertTrue(testCase, segment_getStrand(segment_getReverse(segment))); CuAssertIntEquals(testCase, 2, segment_getStart(segment_getReverse(segment))); CuAssertIntEquals(testCase, 3, segment_getLength(segment_getReverse(segment))); cactusCapTestTeardown(); }
void block_setRootInstance(Block *block, Segment *segment) { block = block_getPositiveOrientation(block); segment = segment_getPositiveOrientation(segment); assert(block_getInstance(block, segment_getName(segment)) == segment); end_setRootInstance(block_get5End(block), segment_get5Cap(segment)); end_setRootInstance(block_get3End(block), segment_get3Cap(segment)); }
static void getMaximalHaplotypePathsP2(Segment *segment, stList *maximalHaplotypePath, stSortedSet *segmentSet, stList *eventStrings) { /* * Iterate all the way to one end of the contig then start the traversal to define the maximal * haplotype path. */ Cap *_5Cap = segment_get5Cap(segment); assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment)))); if (trueAdjacency(_5Cap, eventStrings)) { //Check that the adjacency is supported by a haplotype path Segment *otherSegment = getAdjacentCapsSegment(_5Cap); assert(segment != otherSegment); assert(segment_getReverse(segment) != otherSegment); if (otherSegment != NULL) { assert(stSortedSet_search(segmentSet, otherSegment) == NULL); assert(stSortedSet_search(segmentSet, segment_getReverse( otherSegment)) == NULL); assert(hasCapInEvents(cap_getEnd(segment_get3Cap(otherSegment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(otherSegment)))); getMaximalHaplotypePathsP2(otherSegment, maximalHaplotypePath, segmentSet, eventStrings); } else { //We need to start the maximal haplotype recursion getMaximalHaplotypePathsP3(segment, maximalHaplotypePath, segmentSet, eventStrings); } } else { getMaximalHaplotypePathsP3(segment, maximalHaplotypePath, segmentSet, eventStrings); } }
void printPositions(stList *positions, const char *substitutionType, FILE *fileHandle) { for (int64_t i = 0; i < stList_length(positions); i++) { SegmentHolder *segmentHolder = stList_get(positions, i); int64_t j = segment_getStart(segmentHolder->segment); if (segment_getStrand(segmentHolder->segment)) { j += segmentHolder->offset; assert( cap_getCoordinate(segment_get5Cap(segmentHolder->segment)) == segment_getStart( segmentHolder->segment)); assert( segment_getStart(segmentHolder->segment) + segment_getLength(segmentHolder->segment) - 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } else { j -= segmentHolder->offset; assert( segment_getStart(segmentHolder->segment) - segment_getLength(segmentHolder->segment) + 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } fprintf(fileHandle, "%s: %s_%" PRIi64 " %" PRIi64 " %c %c %c\n", substitutionType, event_getHeader(segment_getEvent(segmentHolder->segment)), sequence_getLength(segment_getSequence(segmentHolder->segment)), j, segmentHolder->base1, segmentHolder->base2, segmentHolder->base3); getMAFBlock(segment_getBlock(segmentHolder->segment), fileHandle); } }
static void debugScaffoldPaths(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); //Traversing from 5' end.. Segment *_5Segment = stList_get(haplotypePath, 0); Segment *_3Segment = stList_get(haplotypePath, stList_length(haplotypePath) - 1); assert(segment_getStrand(_5Segment) == segment_getStrand(_3Segment)); if (!segment_getStrand(_5Segment)) { Segment *j = _5Segment; _5Segment = segment_getReverse(_3Segment); _3Segment = segment_getReverse(j); } assert(segment_getStrand(_5Segment)); assert(segment_getStrand(_3Segment)); Cap *_5Cap = segment_get5Cap(_5Segment); Cap *_3Cap = segment_get3Cap(_3Segment); if (getAdjacentCapsSegment(_5Cap) != NULL) { assert(!trueAdjacency(_5Cap, haplotypeEventStrings)); } if (getAdjacentCapsSegment(_3Cap) != NULL) { assert(!trueAdjacency(_3Cap, haplotypeEventStrings)); } debugScaffoldPathsP(_5Cap, haplotypePath, haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash, segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 1); debugScaffoldPathsP(_3Cap, haplotypePath, haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash, segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 0); } stHash_destruct(segmentToMaximalHaplotypePathHash); }
static void getMaximalHaplotypePathsCheck(Flower *flower, stSortedSet *segmentSet, const char *eventString, stList *eventStrings) { /* * Do debug checks that the haplotypes paths are well formed. */ Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) { if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //isHaplotypeEnd(cap_getEnd(segment_get5Cap(segment)))) { assert(stSortedSet_search(segmentSet, segment) != NULL || stSortedSet_search(segmentSet, segment_getReverse( segment)) != NULL); } } } flower_destructSegmentIterator(segmentIt); Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMaximalHaplotypePathsCheck(group_getNestedFlower(group), segmentSet, eventString, eventStrings); } } flower_destructGroupIterator(groupIt); }
bool getCapGetAtEndOfPath(Cap *cap, Cap **pathEndCap, int64_t *pathLength, int64_t *nCount, stList *haplotypeEventStrings, stList *contaminationEventStrings) { //Account for length of adjacency *pathLength += getTerminalAdjacencyLength(cap); *nCount += getNumberOfNsInAdjacency(cap); Segment *segment = getAdjacentCapsSegment(cap); if (segment == NULL) { *pathEndCap = cap_getAdjacency(getTerminalCap(cap)); assert(*pathEndCap != NULL); return 0; } Cap *adjacentCap = cap_getSide(cap) ? segment_get3Cap(segment) : segment_get5Cap(segment); assert( cap_getName(adjacentCap) == cap_getName( cap_getAdjacency(getTerminalCap(cap)))); End *adjacentEnd = cap_getEnd(adjacentCap); if (hasCapInEvents(adjacentEnd, contaminationEventStrings) || hasCapInEvents(adjacentEnd, haplotypeEventStrings)) { //hasCapNotInEvent(adjacentEnd, event_getHeader(cap_getEvent(cap)))) { //isContaminationEnd(adjacentEnd) || isHaplotypeEnd(adjacentEnd)) { *pathEndCap = adjacentCap; return 1; } *pathLength += segment_getLength(segment); *nCount += getNumberOfNsInSegment(segment); return getCapGetAtEndOfPath(cap_getOtherSegmentCap(adjacentCap), pathEndCap, pathLength, nCount, haplotypeEventStrings, contaminationEventStrings); }
void testBlock_getSetRootInstance(CuTest *testCase) { cactusBlockTestSetup(); Block *block2 = block_construct(1, flower); CuAssertTrue(testCase, block_getRootInstance(block2) == NULL); block_destruct(block2); //block_setRootInstance(block, segment_getReverse(rootSegment)); //set in the constructor code of the test. CuAssertTrue(testCase, block_getRootInstance(block) == segment_getReverse(rootSegment)); CuAssertTrue(testCase, end_getRootInstance(block_get5End(block)) == segment_get5Cap(segment_getReverse(rootSegment))); CuAssertTrue(testCase, end_getRootInstance(block_get3End(block)) == segment_get3Cap(segment_getReverse(rootSegment))); cactusBlockTestTeardown(); }
bool linked(Segment *segmentX, Segment *segmentY, int64_t difference, const char *eventString, bool *aligned) { assert(segment_getStrand(segmentX)); assert(segment_getStrand(segmentY)); *aligned = 0; if (segment_getStart(segmentX) < segment_getStart(segmentY)) { Block *blockX = segment_getBlock(segmentX); Block *blockY = segment_getBlock(segmentY); Block_InstanceIterator *instanceItX = block_getInstanceIterator(blockX); Segment *segmentX2; while ((segmentX2 = block_getNext(instanceItX)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segmentX2)), eventString) == 0) { Block_InstanceIterator *instanceItY = block_getInstanceIterator(blockY); Segment *segmentY2; while ((segmentY2 = block_getNext(instanceItY)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segmentY2)), eventString) == 0) { *aligned = 1; if (sequence_getMetaSequence( segment_getSequence(segmentX2)) == sequence_getMetaSequence( segment_getSequence(segmentY2))) { //Have the same assembly sequence //Now check if the two segments are connected by a path of adjacency from the 3' end of segmentX to the 5' end of segmentY. int64_t separationDistance; if (capsAreAdjacent(segment_get3Cap(segmentX2), segment_get5Cap(segmentY2), &separationDistance)) { //if(difference < 10000 || (separationDistance <= difference * 1.5 && difference <= separationDistance * 1.5)) { block_destructInstanceIterator(instanceItX); block_destructInstanceIterator(instanceItY); return 1; //} } } } } block_destructInstanceIterator(instanceItY); } } block_destructInstanceIterator(instanceItX); } else { assert(segmentX == segmentY); if(hasCapInEvent(block_get5End(segment_getBlock(segmentX)), eventString)) { *aligned = 1; return 1; } } return 0; }
Segment *block_splitP(Segment *segment, Block *leftBlock, Block *rightBlock) { Segment *leftSegment = segment_getSequence(segment) != NULL ? segment_construct2(leftBlock, segment_getStart(segment), segment_getStrand(segment), segment_getSequence(segment)) : segment_construct(leftBlock, segment_getEvent(segment)); Segment *rightSegment = segment_getSequence(segment) != NULL ? segment_construct2(rightBlock, segment_getStart(segment) + block_getLength(leftBlock), segment_getStrand(segment), segment_getSequence(segment)) : segment_construct(rightBlock, segment_getEvent(segment)); //link together. cap_makeAdjacent(segment_get3Cap(leftSegment), segment_get5Cap(rightSegment)); //update adjacencies. Cap *_5Cap = segment_get5Cap(segment); Cap *new5Cap = segment_get5Cap(leftSegment); Cap *_3Cap = segment_get3Cap(segment); Cap *new3Cap = segment_get3Cap(rightSegment); if(cap_getAdjacency(_5Cap) != NULL) { cap_makeAdjacent(cap_getAdjacency(_5Cap), new5Cap); } if(cap_getAdjacency(_3Cap) != NULL) { cap_makeAdjacent(cap_getAdjacency(_3Cap), new3Cap); } return leftSegment; }
void testCap_getOtherSegmentCap(CuTest *testCase) { cactusCapTestSetup(); Block *block = block_construct(3, flower); Segment *segment = segment_construct2(block, 2, 1, sequence); Cap *_5Cap = segment_get5Cap(segment); Cap *_3Cap = segment_get3Cap(segment); CuAssertTrue(testCase, cap_getOtherSegmentCap(leaf1Cap) == NULL); CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(leaf1Cap)) == NULL); CuAssertTrue(testCase, cap_getOtherSegmentCap(_5Cap) == _3Cap); CuAssertTrue(testCase, cap_getOtherSegmentCap(_3Cap) == _5Cap); CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(_5Cap)) == cap_getReverse(_3Cap)); CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(_3Cap)) == cap_getReverse(_5Cap)); cactusCapTestTeardown(); }
static int64_t getBoundingNs(Cap *cap) { assert(cap != NULL); Segment *segment = getCapsSegment(cap); if (segment == NULL) { return 0; } Cap *_5TerminalCap = getTerminalCap(segment_get5Cap(segment)); Cap *_3TerminalCap = getTerminalCap(segment_get3Cap(segment)); (void)_3TerminalCap; assert(_5TerminalCap != NULL); assert(_3TerminalCap != NULL); //return 0; if (cap_getName(_5TerminalCap) == cap_getName(cap)) { return getBoundingNsP(segment); } else { assert(cap_getName(_3TerminalCap) == cap_getName(cap)); return getBoundingNsP(segment_getReverse(segment)); } }
static void getMaximalHaplotypePathsP(Flower *flower, stList *maximalHaplotypePaths, stSortedSet *segmentSet, const char *eventString, stList *eventStrings) { /* * Iterate through the segments in this flower. */ Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (stSortedSet_search(segmentSet, segment) == NULL && stSortedSet_search(segmentSet, segment_getReverse(segment)) == NULL) { //Check we haven't yet seen this segment if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) { //Check if the segment is in the assembly if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //Is a block in a haplotype segment assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment)))); stList *maximalHaplotypePath = stList_construct(); stList_append(maximalHaplotypePaths, maximalHaplotypePath); getMaximalHaplotypePathsP2(segment, maximalHaplotypePath, segmentSet, eventStrings); } else { assert(!hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings));//assert(!isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment)))); } } } } flower_destructSegmentIterator(segmentIt); /* * Now recurse on the contained flowers. */ Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMaximalHaplotypePathsP(group_getNestedFlower(group), maximalHaplotypePaths, segmentSet, eventString, eventStrings); } } flower_destructGroupIterator(groupIt); }
int64_t flower_getTotalBaseLength(Flower *flower) { /* * The implementation of this function is very like that in group_getTotalBaseLength, with a few differences. Consider merging them. */ Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; int64_t totalLength = 0; while ((end = flower_getNextEnd(endIterator)) != NULL) { if (!end_isBlockEnd(end)) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap) && cap_getSequence(cap) != NULL) { Cap *cap2 = cap_getAdjacency(cap); assert(cap2 != NULL); while (end_isBlockEnd(cap_getEnd(cap2))) { Segment *segment = cap_getSegment(cap2); assert(segment != NULL); assert(segment_get5Cap(segment) == cap2); cap2 = cap_getAdjacency(segment_get3Cap(segment)); assert(cap2 != NULL); assert(cap_getStrand(cap2)); assert(cap_getSide(cap2)); } assert(cap_getStrand(cap2)); assert(cap_getSide(cap2)); int64_t length = cap_getCoordinate(cap2) - cap_getCoordinate(cap) - 1; assert(length >= 0); totalLength += length; } } end_destructInstanceIterator(instanceIterator); } } flower_destructEndIterator(endIterator); return totalLength; }
stList *getContigPaths(Flower *flower, const char *eventString, stList *eventStrings) { stList *maximalHaplotypePaths = stList_construct3(0, (void(*)(void *)) stList_destruct); stSortedSet *segmentSet = stSortedSet_construct(); getMaximalHaplotypePathsP(flower, maximalHaplotypePaths, segmentSet, eventString, eventStrings); //Do some debug checks.. st_logDebug("We have %" PRIi64 " maximal haplotype paths\n", stList_length( maximalHaplotypePaths)); getMaximalHaplotypePathsCheck(flower, segmentSet, eventString, eventStrings); for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) { stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i); st_logDebug("We have a maximal haplotype path with length %" PRIi64 "\n", stList_length(maximalHaplotypePath)); assert(stList_length(maximalHaplotypePath) > 0); Segment *_5Segment = stList_get(maximalHaplotypePath, 0); Segment *_3Segment = stList_get(maximalHaplotypePath, stList_length( maximalHaplotypePath) - 1); if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) { assert(!trueAdjacency(segment_get5Cap(_5Segment), eventStrings)); } if (getAdjacentCapsSegment(segment_get3Cap(_3Segment)) != NULL) { assert(!trueAdjacency(segment_get3Cap(_3Segment), eventStrings)); } for (int64_t j = 0; j < stList_length(maximalHaplotypePath) - 1; j++) { _5Segment = stList_get(maximalHaplotypePath, j); _3Segment = stList_get(maximalHaplotypePath, j + 1); assert(trueAdjacency(segment_get3Cap(_5Segment), eventStrings)); assert(trueAdjacency(segment_get5Cap(_3Segment), eventStrings)); assert(cap_getAdjacency(getTerminalCap(segment_get3Cap(_5Segment))) == getTerminalCap(segment_get5Cap(_3Segment))); assert(strcmp(event_getHeader(segment_getEvent(_5Segment)), eventString) == 0); assert(strcmp(event_getHeader(segment_getEvent(_3Segment)), eventString) == 0); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_5Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_5Segment)))); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_3Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_3Segment)))); } } stSortedSet_destruct(segmentSet); return maximalHaplotypePaths; }
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) { stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths); stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths); for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stSortedSet *bucket = stSortedSet_construct(); stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket); stSortedSet_insert(bucket, stList_get(haplotypePaths, i)); } for (int64_t i = 0; i < stList_length(haplotypePaths); i++) { stList *haplotypePath = stList_get(haplotypePaths, i); assert(stList_length(haplotypePath) > 0); Segment *_5Segment = stList_get(haplotypePath, 0); if (!segment_getStrand(_5Segment)) { _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1)); } assert(segment_getStrand(_5Segment)); if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) { assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings)); } int64_t insertLength; int64_t deleteLength; Cap *otherCap; enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters); if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) { assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL); int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0); Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment)); assert(adjacentSegment != NULL); while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment)); assert(adjacentSegment != NULL); } assert(adjacentSegment != NULL); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment); if (adjacentHaplotypePath == NULL) { adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse( adjacentSegment)); } assert(adjacentHaplotypePath != NULL); assert(adjacentHaplotypePath != haplotypePath); assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL); int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0); //Now merge the buckets and make new int tuples.. stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath); stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath); assert(bucket1 != NULL); assert(bucket2 != NULL); assert(bucket1 != bucket2); stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2); stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3); stList *l; while ((l = stSortedSet_getNext(bucketIt)) != NULL) { //Do the bucket first assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2); stHash_remove(haplotypePathToScaffoldPathHash, l); stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3); //Now the length stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l); assert(m != NULL); assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k); stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k)); stIntTuple_destruct(m); } assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3); assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3); stSortedSet_destructIterator(bucketIt); } } stHash_destruct(segmentToMaximalHaplotypePathHash); return haplotypeToMaximalHaplotypeLengthHash; }