bool capsAreAdjacent(Cap *cap1, Cap *cap2, int64_t *separationDistance) { if (cap_getName(cap2) != cap_getName(cap1) && cap_getCoordinate(cap1) != cap_getCoordinate(cap2)) { //This can happen if end1 == end2 if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) { assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader( cap_getEvent(cap2))) == 0); assert(cap_getPositiveOrientation(cap1) != cap_getPositiveOrientation(cap2)); assert(cap_getName(cap1) != cap_getName(cap2)); assert(sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))); if (!cap_getStrand(cap1)) { cap1 = cap_getReverse(cap1); } if (!cap_getStrand(cap2)) { cap2 = cap_getReverse(cap2); } assert(cap_getStrand(cap1)); assert(cap_getStrand(cap2)); if (cap_getCoordinate(cap1) < cap_getCoordinate(cap2)) { if (!cap_getSide(cap1) && cap_getSide(cap2)) { *separationDistance = cap_getCoordinate(cap2) - cap_getCoordinate(cap1) - 1; //The minus 1, to give the length of the sequence between the two caps. return 1; } } else { if (cap_getSide(cap1) && !cap_getSide(cap2)) { *separationDistance = cap_getCoordinate(cap1) - cap_getCoordinate(cap2) - 1; return 1; } } } } return 0; }
void testEnd_getInstance(CuTest* testCase) { cactusEndTestSetup(); CuAssertTrue(testCase, end_getInstance(end, cap_getName(rootCap)) == cap_getReverse(rootCap)); CuAssertTrue(testCase, end_getInstance(end, cap_getName(leaf1Cap)) == cap_getReverse(leaf1Cap)); CuAssertTrue(testCase, end_getInstance(end, cap_getName(leaf2Cap)) == leaf2Cap); cactusEndTestTeardown(); }
void testCap_getCoordinate(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getCoordinate(rootCap) == INT64_MAX); CuAssertTrue(testCase, cap_getCoordinate(cap_getReverse(rootCap)) == INT64_MAX); CuAssertTrue(testCase, cap_getCoordinate(leaf1Cap) == 4); CuAssertTrue(testCase, cap_getCoordinate(cap_getReverse(leaf1Cap)) == 4); cactusCapTestTeardown(); }
void testCap_getSequence(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getSequence(rootCap) == NULL); CuAssertTrue(testCase, cap_getSequence(cap_getReverse(rootCap)) == NULL); CuAssertTrue(testCase, cap_getSequence(leaf1Cap) == sequence); CuAssertTrue(testCase, cap_getSequence(cap_getReverse(leaf1Cap)) == sequence); cactusCapTestTeardown(); }
void testCap_getEnd(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getEnd(rootCap) == end_getReverse(end)); CuAssertTrue(testCase, cap_getEnd(cap_getReverse(rootCap)) == end); CuAssertTrue(testCase, cap_getEnd(leaf2Cap) == end); CuAssertTrue(testCase, cap_getEnd(cap_getReverse(leaf2Cap)) == end_getReverse(end)); cactusCapTestTeardown(); }
void testCap_getSide(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, !cap_getSide(rootCap)); CuAssertTrue(testCase, cap_getSide(cap_getReverse(rootCap))); CuAssertTrue(testCase, !cap_getSide(leaf1Cap)); CuAssertTrue(testCase, cap_getSide(cap_getReverse(leaf1Cap))); CuAssertTrue(testCase, cap_getSide(leaf2Cap)); CuAssertTrue(testCase, !cap_getSide(cap_getReverse(leaf2Cap))); cactusCapTestTeardown(); }
void testCap_adjacent(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getAdjacency(leaf1Cap) == NULL); CuAssertTrue(testCase, cap_getAdjacency(leaf3Cap) == NULL); cap_makeAdjacent(leaf1Cap, leaf3Cap); CuAssertTrue(testCase, cap_getAdjacency(leaf1Cap) == cap_getReverse(leaf3Cap)); CuAssertTrue(testCase, cap_getAdjacency(leaf3Cap) == cap_getReverse(leaf1Cap)); CuAssertTrue(testCase, cap_getAdjacency(cap_getReverse(leaf1Cap)) == leaf3Cap); CuAssertTrue(testCase, cap_getAdjacency(cap_getReverse(leaf3Cap)) == leaf1Cap); cactusCapTestTeardown(); }
void testCap_getSegment(CuTest* testCase) { cactusCapTestSetup(); Block *block = block_construct(2, flower); Segment *segment = segment_construct(block, rootEvent); CuAssertTrue(testCase, cap_getSegment(segment_get5Cap(segment)) == segment); CuAssertTrue(testCase, cap_getSegment(segment_get3Cap(segment)) == segment); CuAssertTrue(testCase, cap_getOrientation(segment_get5Cap(segment)) == segment_getOrientation(segment)); CuAssertTrue(testCase, cap_getOrientation(segment_get3Cap(segment)) == segment_getOrientation(segment)); CuAssertTrue(testCase, cap_getSegment(cap_getReverse(segment_get5Cap(segment))) == segment_getReverse(segment)); CuAssertTrue(testCase, cap_getSegment(cap_getReverse(segment_get3Cap(segment))) == segment_getReverse(segment)); cactusCapTestTeardown(); }
void testCap_getChild(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getChild(rootCap, 0) == leaf1Cap); if (!nestedTest) { CuAssertTrue(testCase, cap_getChild(rootCap, 1) == cap_getReverse(leaf2Cap)); } else { // leaf2Cap is at the end of the child list when it's been // serialized, deleted, and unserialized. CuAssertTrue(testCase, cap_getChild(rootCap, 2) == cap_getReverse(leaf2Cap)); } cactusCapTestTeardown(); }
void testCap_getOrientation(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getOrientation(rootCap) == end_getOrientation(cap_getEnd(rootCap))); CuAssertTrue(testCase, cap_getOrientation(leaf1Cap) == end_getOrientation(cap_getEnd(leaf1Cap))); CuAssertTrue(testCase, cap_getOrientation(leaf2Cap) == end_getOrientation(cap_getEnd(leaf2Cap))); CuAssertTrue(testCase, cap_getOrientation(cap_getReverse(rootCap)) == end_getOrientation(end_getReverse(cap_getEnd(rootCap)))); CuAssertTrue(testCase, cap_getOrientation(cap_getReverse(leaf1Cap)) == end_getOrientation(end_getReverse(cap_getEnd(leaf1Cap)))); CuAssertTrue(testCase, cap_getOrientation(cap_getReverse(leaf2Cap)) == end_getOrientation(end_getReverse(cap_getEnd(leaf2Cap)))); CuAssertTrue(testCase, cap_getOrientation(leaf1Cap) == cap_getOrientation(rootCap)); CuAssertTrue(testCase, cap_getOrientation(leaf1Cap) != cap_getOrientation(leaf2Cap)); cactusCapTestTeardown(); }
static void recoverBrokenAdjacencies(Flower *flower, stList *recoveredCaps, Name referenceEventName) { /* * Find reference intervals that are book-ended by stubs created in a child flower. */ Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while((group = flower_getNextGroup(groupIt)) != NULL) { Flower *nestedFlower; if((nestedFlower = group_getNestedFlower(group)) != NULL) { Flower_EndIterator *endIt = flower_getEndIterator(nestedFlower); End *childEnd; while((childEnd = flower_getNextEnd(endIt)) != NULL) { if(end_isStubEnd(childEnd) && flower_getEnd(flower, end_getName(childEnd)) == NULL) { //We have a thread we need to promote Cap *childCap = getCapForReferenceEvent(childEnd, referenceEventName); //The cap in the reference assert(childCap != NULL); assert(!end_isAttached(childEnd)); childCap = cap_getStrand(childCap) ? childCap : cap_getReverse(childCap); if (!cap_getSide(childCap)) { Cap *adjacentChildCap = NULL; int64_t adjacencyLength = traceThreadLength(childCap, &adjacentChildCap); Cap *cap = copyCapToParent(childCap, recoveredCaps); assert(adjacentChildCap != NULL); assert(!end_isAttached(cap_getEnd(adjacentChildCap))); assert(!cap_getSide(cap)); Cap *adjacentCap = copyCapToParent(adjacentChildCap, recoveredCaps); cap_makeAdjacent(cap, adjacentCap); setAdjacencyLength(cap, adjacentCap, adjacencyLength); } } } flower_destructEndIterator(endIt); } } flower_destructGroupIterator(groupIt); }
void stCaf_addAdjacencies(Flower *flower) { //Build a list of caps. stList *list = stList_construct(); Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIterator)) != NULL) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { if (!cap_getStrand(cap)) { cap = cap_getReverse(cap); } stList_append(list, cap); } end_destructInstanceIterator(instanceIterator); } flower_destructEndIterator(endIterator); assert(stList_length(list) % 2 == 0); //Sort the list of caps. stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP); //Now make the adjacencies. for (int64_t i = 1; i < stList_length(list); i += 2) { Cap *cap = stList_get(list, i - 1); Cap *cap2 = stList_get(list, i); cap_makeAdjacent(cap, cap2); } //Clean up. stList_destruct(list); }
void testCap_getParent(CuTest* testCase) { cactusCapTestSetup(); CuAssertTrue(testCase, cap_getParent(rootCap) == NULL); CuAssertTrue(testCase, cap_getParent(leaf1Cap) == rootCap); CuAssertTrue(testCase, cap_getParent(leaf2Cap) == cap_getReverse(rootCap)); cactusCapTestTeardown(); }
void testCap_getTopFace(CuTest* testCase) { cactusCapTestSetup(); Face *face = face_construct(flower); cap_setTopFace(rootCap, face); CuAssertTrue(testCase, cap_getTopFace(rootCap) == face); CuAssertTrue(testCase, cap_getTopFace(cap_getReverse(rootCap)) == face); cactusCapTestTeardown(); }
Cap *getTerminalCap(Cap *cap) { Flower *nestedFlower = group_getNestedFlower(end_getGroup(cap_getEnd(cap))); if (nestedFlower != NULL) { Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); return getTerminalCap(cap_getOrientation(cap) ? nestedCap : cap_getReverse(nestedCap)); } return cap; }
void testCap_getOtherSegmentCap(CuTest *testCase) { cactusCapTestSetup(); Block *block = block_construct(3, flower); Segment *segment = segment_construct2(block, 2, 1, sequence); Cap *_5Cap = segment_get5Cap(segment); Cap *_3Cap = segment_get3Cap(segment); CuAssertTrue(testCase, cap_getOtherSegmentCap(leaf1Cap) == NULL); CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(leaf1Cap)) == NULL); CuAssertTrue(testCase, cap_getOtherSegmentCap(_5Cap) == _3Cap); CuAssertTrue(testCase, cap_getOtherSegmentCap(_3Cap) == _5Cap); CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(_5Cap)) == cap_getReverse(_3Cap)); CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(_3Cap)) == cap_getReverse(_5Cap)); cactusCapTestTeardown(); }
void testEnd_getCapForEvent(CuTest* testCase) { cactusEndTestSetup(); CuAssertPtrEquals(testCase, end_getCapForEvent(end_getReverse(end), event_getName(rootEvent)), rootCap); Cap *cap = end_getCapForEvent(end, event_getName(leafEvent)); CuAssertTrue(testCase, cap == cap_getReverse(leaf1Cap) || cap == leaf2Cap || cap == cap_getReverse(leaf3Cap)); CuAssertTrue(testCase, end_getCapForEvent(end, NULL_NAME) == NULL); cactusEndTestTeardown(); }
void testEnd_getSetRootInstance(CuTest* testCase) { cactusEndTestSetup(); CuAssertTrue(testCase, end_getRootInstance(end) == cap_getReverse(rootCap)); CuAssertTrue(testCase, end_getRootInstance(end_getReverse(end)) == rootCap); End *end2 = end_construct(0, flower); CuAssertTrue(testCase, end_getRootInstance(end2) == NULL); CuAssertTrue(testCase, end_getRootInstance(end_getReverse(end2)) == NULL); cactusEndTestTeardown(); }
static void testAdjacencySequence_4(CuTest *testCase) { setup(); AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap_getReverse(cap2), 0); CuAssertTrue(testCase, adjacencySequence->subsequenceIdentifier == cap_getName(cap1)); //sequence_getName(sequence1)); CuAssertIntEquals(testCase, adjacencySequence->start, 4); CuAssertIntEquals(testCase, adjacencySequence->strand, 0); CuAssertIntEquals(testCase, adjacencySequence->length, 0); CuAssertStrEquals(testCase, "", adjacencySequence->string); adjacencySequence_destruct(adjacencySequence); teardown(); }
void testEnd_serialisation(CuTest* testCase) { cactusEndTestSetup(); Name rootInstanceName = cap_getName(rootCap); Name leaf1InstanceName = cap_getName(leaf1Cap); Name leaf2InstanceName = cap_getName(leaf2Cap); Name leaf3InstanceName = cap_getName(leaf3Cap); int64_t i; void *vA = binaryRepresentation_makeBinaryRepresentation(end, (void(*)(void *, void(*)(const void *, size_t, size_t))) end_writeBinaryRepresentation, &i); CuAssertTrue(testCase, i > 0); end_destruct(end); void *vA2 = vA; end = end_loadFromBinaryRepresentation(&vA2, flower); rootCap = cap_getReverse(end_getInstance(end, rootInstanceName)); leaf1Cap = cap_getReverse(end_getInstance(end, leaf1InstanceName)); leaf2Cap = end_getInstance(end, leaf2InstanceName); leaf3Cap = cap_getReverse(end_getInstance(end, leaf3InstanceName)); CuAssertTrue(testCase, leaf3Cap != NULL); free(vA); nestedTest = 1; testEnd_copyConstruct(testCase); testEnd_getName(testCase); testEnd_getOrientation(testCase); testEnd_getReverse(testCase); testEnd_getSide(testCase); testEnd_getFlower(testCase); testEnd_getBlock(testCase); testEnd_getOtherBlockEnd(testCase); testEnd_getGroup(testCase); testEnd_setGroup(testCase); testEnd_getInstanceNumber(testCase); testEnd_getInstance(testCase); testEnd_getFirst(testCase); testEnd_getSetRootInstance(testCase); testEnd_instanceIterator(testCase); testEnd_isBlockOrStubEnd(testCase); testEnd_isAttachedOrFree(testCase); testEnd_getCapForEvent(testCase); nestedTest = 0; cactusEndTestTeardown(); }
void testCap_segmentCoordinatesReverseStrand(CuTest* testCase) { /* * Tests the coordinates of an segment and its 5 and 3 prime caps. */ cactusCapTestSetup(); Block *block = block_construct(3, flower); Segment *segment = segment_construct2(block, 2, 0, sequence); Cap *_5Cap = segment_get5Cap(segment); Cap *_3Cap = segment_get3Cap(segment); CuAssertTrue(testCase, cap_getSide(_5Cap)); CuAssertTrue(testCase, !cap_getSide(_3Cap)); CuAssertTrue(testCase, !cap_getStrand(_5Cap)); CuAssertTrue(testCase, !cap_getStrand(_3Cap)); CuAssertIntEquals(testCase, 4, cap_getCoordinate(_5Cap)); CuAssertIntEquals(testCase, 2, cap_getCoordinate(_3Cap)); CuAssertTrue(testCase, !segment_getStrand(segment)); CuAssertIntEquals(testCase, 4, segment_getStart(segment)); CuAssertIntEquals(testCase, 3, segment_getLength(segment)); CuAssertTrue(testCase, !cap_getSide(cap_getReverse(_5Cap))); CuAssertTrue(testCase, cap_getSide(cap_getReverse(_3Cap))); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_5Cap))); CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_3Cap))); CuAssertIntEquals(testCase, 4, cap_getCoordinate(cap_getReverse(_5Cap))); CuAssertIntEquals(testCase, 2, cap_getCoordinate(cap_getReverse(_3Cap))); CuAssertTrue(testCase, segment_getStrand(segment_getReverse(segment))); CuAssertIntEquals(testCase, 2, segment_getStart(segment_getReverse(segment))); CuAssertIntEquals(testCase, 3, segment_getLength(segment_getReverse(segment))); cactusCapTestTeardown(); }
void topDown(Flower *flower, Name referenceEventName) { /* * Run on each flower, top down. Sets the coordinates of each reference cap to the correct * sequence, and sets the bases of the reference sequence to be consensus bases. */ Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference if (cap != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { assert(cap_getCoordinate(cap) != INT64_MAX); Sequence *sequence = cap_getSequence(cap); assert(sequence != NULL); Group *group = end_getGroup(end); if (!group_isLeaf(group)) { Flower *nestedFlower = group_getNestedFlower(group); Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap); assert(cap_getStrand(nestedCap)); assert(!cap_getSide(nestedCap)); int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence), nestedCap, cap_getCoordinate(cap)); (void) endCoordinate; assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap))); assert(endCoordinate == cap_getCoordinate( flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap))))); } } } } flower_destructEndIterator(endIt); }
void testCap_getTopCap(CuTest* testCase) { cactusCapTestSetup(); End *end1 = end_construct(0, flower); End *end2 = end_construct(0, flower); End *end3 = end_construct(0, flower); Event *intermediateEvent = event_construct4(NULL, 0.0, rootEvent, leafEvent, eventTree); Cap *cap1T = cap_construct(end1, rootEvent); Cap *cap1I = cap_construct(end1, intermediateEvent); Cap *cap1L1 = cap_construct(end1, leafEvent); Cap *cap1L2 = cap_construct(end1, leafEvent); cap_makeParentAndChild(cap1I, cap1L1); cap_makeParentAndChild(cap1I, cap1L2); cap_makeParentAndChild(cap1T, cap1I); end_setRootInstance(end1, cap1T); assert(end_getRootInstance(end1) == cap1T); CuAssertTrue(testCase, cap_getTopCap(cap1L1) == NULL); CuAssertTrue(testCase, cap_getTopCap(cap_getReverse(cap1L1)) == NULL); CuAssertTrue(testCase, cap_getTopCap(cap1L2) == NULL); CuAssertTrue(testCase, cap_getTopCap(cap1I) == NULL); Cap *cap2T = cap_construct(end2, rootEvent); Cap *cap2L = cap_construct(end2, leafEvent); cap_makeParentAndChild(cap2T, cap2L); end_setRootInstance(end2, cap2T); cap_makeAdjacent(cap1L1, cap2L); CuAssertTrue(testCase, cap_getTopCap(cap1L1) == cap1T); CuAssertTrue(testCase, cap_getTopCap(cap_getReverse(cap1L1)) == cap_getReverse(cap1T)); CuAssertTrue(testCase, cap_getTopCap(cap1I) == NULL); Cap *cap3T = cap_construct(end3, rootEvent); Cap *cap3I = cap_construct(end3, intermediateEvent); cap_makeParentAndChild(cap3T, cap3I); end_setRootInstance(end3, cap3T); cap_makeAdjacent(cap1I, cap3I); cap_makeAdjacent(cap1T, cap3T); CuAssertTrue(testCase, cap_getTopCap(cap1L1) == cap1I); CuAssertTrue(testCase, cap_getTopCap(cap_getReverse(cap1L1)) == cap_getReverse(cap1I)); CuAssertTrue(testCase, cap_getTopCap(cap1I) == cap1T); CuAssertTrue(testCase, cap_getTopCap(cap_getReverse(cap1I)) == cap_getReverse(cap1T)); CuAssertTrue(testCase, cap_getTopCap(cap1T) == NULL); cactusCapTestTeardown(); }
static Cap *copyCapToParent(Cap *cap, stList *recoveredCaps) { /* * Get the adjacent stub end by looking at the reference adjacency in the parent. */ End *end = cap_getEnd(cap); assert(end != NULL); Group *parentGroup = flower_getParentGroup(end_getFlower(end)); assert(parentGroup != NULL); End *copiedEnd = end_copyConstruct(end, group_getFlower(parentGroup)); end_setGroup(copiedEnd, parentGroup); //Set group Cap *copiedCap = end_getInstance(copiedEnd, cap_getName(cap)); assert(copiedCap != NULL); copiedCap = cap_getStrand(copiedCap) ? copiedCap : cap_getReverse(copiedCap); if (!cap_getSide(copiedCap)) { stList_append(recoveredCaps, copiedCap); } return copiedCap; }
char *getTerminalAdjacencySubString(Cap *cap) { if(getTerminalAdjacencyLength_ignoreAdjacencies) { return stString_copy(""); } cap = getTerminalCap(cap); cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); //This ensures the asserts are as expected. Cap *adjacentCap = cap_getAdjacency(cap); int64_t i = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap); assert(i != 0); if (i > 0) { assert(cap_getSide(cap)); assert(!cap_getSide(adjacentCap)); return sequence_getString(cap_getSequence(cap), cap_getCoordinate(adjacentCap) + 1, i - 1, 1); } else { assert(cap_getSide(adjacentCap)); assert(!cap_getSide(cap)); return sequence_getString(cap_getSequence(cap), cap_getCoordinate(cap) + 1, -i - 1, 1); } }
static stList *getCaps(stList *flowers, Name referenceEventName) { stList *caps = stList_construct(); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); //Get list of caps Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { if (end_isStubEnd(end)) { Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference if(cap != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { stList_append(caps, cap); } } } } flower_destructEndIterator(endIt); } return caps; }
int64_t flower_getTotalBaseLength(Flower *flower) { /* * The implementation of this function is very like that in group_getTotalBaseLength, with a few differences. Consider merging them. */ Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; int64_t totalLength = 0; while ((end = flower_getNextEnd(endIterator)) != NULL) { if (!end_isBlockEnd(end)) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap) && cap_getSequence(cap) != NULL) { Cap *cap2 = cap_getAdjacency(cap); assert(cap2 != NULL); while (end_isBlockEnd(cap_getEnd(cap2))) { Segment *segment = cap_getSegment(cap2); assert(segment != NULL); assert(segment_get5Cap(segment) == cap2); cap2 = cap_getAdjacency(segment_get3Cap(segment)); assert(cap2 != NULL); assert(cap_getStrand(cap2)); assert(cap_getSide(cap2)); } assert(cap_getStrand(cap2)); assert(cap_getSide(cap2)); int64_t length = cap_getCoordinate(cap2) - cap_getCoordinate(cap) - 1; assert(length >= 0); totalLength += length; } } end_destructInstanceIterator(instanceIterator); } } flower_destructEndIterator(endIterator); return totalLength; }
Segment *getCapsSegment(Cap *cap) { if (cap_getSegment(cap) != NULL) { return cap_getSegment(cap); } assert(!end_isBlockEnd(cap_getEnd(cap))); assert(end_isStubEnd(cap_getEnd(cap))); //Walk up to get the next adjacency. Group *parentGroup = flower_getParentGroup(end_getFlower(cap_getEnd(cap))); if (parentGroup != NULL) { Cap *parentCap = flower_getCap(group_getFlower(parentGroup), cap_getName(cap)); if (parentCap != NULL) { assert(cap_getOrientation(parentCap)); if (!cap_getOrientation(cap)) { parentCap = cap_getReverse(parentCap); } return getCapsSegment(parentCap); } else { //Cap must be a free stub end. assert(0); //Not in the current alignments. assert(end_isFree(cap_getEnd(cap))); } } return NULL; }
static stList *getSubstringsForFlowers(stList *flowers) { /* * Get the set of substrings for sequence intervals in the given set of flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { if (end_isStubEnd(end)) { End_InstanceIterator *instanceIt = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = cap_getSequence(cap)) != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { //We have a sequence interval of interest Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); int64_t length = cap_getCoordinate(adjacentCap) - cap_getCoordinate(cap) - 1; assert(length >= 0); if (length > 0) { stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, cap_getCoordinate(cap) + 1 - sequence_getStart(sequence), length)); } } } } end_destructInstanceIterator(instanceIt); } } flower_destructEndIterator(endIt); } return substrings; }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }