bool linked(Segment *segmentX, Segment *segmentY, int64_t difference, const char *eventString, bool *aligned) { assert(segment_getStrand(segmentX)); assert(segment_getStrand(segmentY)); *aligned = 0; if (segment_getStart(segmentX) < segment_getStart(segmentY)) { Block *blockX = segment_getBlock(segmentX); Block *blockY = segment_getBlock(segmentY); Block_InstanceIterator *instanceItX = block_getInstanceIterator(blockX); Segment *segmentX2; while ((segmentX2 = block_getNext(instanceItX)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segmentX2)), eventString) == 0) { Block_InstanceIterator *instanceItY = block_getInstanceIterator(blockY); Segment *segmentY2; while ((segmentY2 = block_getNext(instanceItY)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segmentY2)), eventString) == 0) { *aligned = 1; if (sequence_getMetaSequence( segment_getSequence(segmentX2)) == sequence_getMetaSequence( segment_getSequence(segmentY2))) { //Have the same assembly sequence //Now check if the two segments are connected by a path of adjacency from the 3' end of segmentX to the 5' end of segmentY. int64_t separationDistance; if (capsAreAdjacent(segment_get3Cap(segmentX2), segment_get5Cap(segmentY2), &separationDistance)) { //if(difference < 10000 || (separationDistance <= difference * 1.5 && difference <= separationDistance * 1.5)) { block_destructInstanceIterator(instanceItX); block_destructInstanceIterator(instanceItY); return 1; //} } } } } block_destructInstanceIterator(instanceItY); } } block_destructInstanceIterator(instanceItX); } else { assert(segmentX == segmentY); if(hasCapInEvent(block_get5End(segment_getBlock(segmentX)), eventString)) { *aligned = 1; return 1; } } return 0; }
static stList *getSubstringsForFlowerSegments(stList *flowers) { /* * Get the set of substrings representing the strings in the segments of the given flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *blockIt = flower_getBlockIterator(flower); Block *block; while ((block = flower_getNextBlock(blockIt)) != NULL) { Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = segment_getSequence(segment)) != NULL) { segment = segment_getStrand(segment) ? segment : segment_getReverse(segment); assert(segment_getLength(segment) > 0); stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, segment_getStart(segment) - sequence_getStart(sequence), segment_getLength(segment))); } } block_destructInstanceIterator(instanceIt); } flower_destructBlockIterator(blockIt); } return substrings; }
static void getMAFBlock2(Block *block, FILE *fileHandle) { if (block_getLength(block) >= minimumBlockLength) { //Calculate bases in the reference and other reference sequence Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); bool includesReference = 0; bool includesOtherReference = 0; Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { const char *segmentEvent = event_getHeader( segment_getEvent(segment)); if (strcmp(segmentEvent, referenceEventString) == 0) { includesReference = 1; } else if (strcmp(segmentEvent, otherReferenceEventString) == 0) { includesOtherReference = 1; } } block_destructInstanceIterator(instanceIt); if (ignoreOtherReferenceBlocks && includesOtherReference) { return; } stSortedSet *otherSampleEvents = stSortedSet_construct3( (int(*)(const void *, const void *)) strcmp, NULL); instanceIt = block_getInstanceIterator(block); int32_t sampleNumber = 0; while ((segment = block_getNext(instanceIt)) != NULL) { const char *segmentEvent = event_getHeader( segment_getEvent(segment)); if (strcmp(segmentEvent, sampleEventString) == 0) { sampleNumber++; } else if (strcmp(segmentEvent, referenceEventString) != 0) { stSortedSet_insert(otherSampleEvents, (void *) segmentEvent); } } block_destructInstanceIterator(instanceIt); baseCoverages[stSortedSet_size(otherSampleEvents)] += block_getLength( block) * sampleNumber; stSortedSet_destruct(otherSampleEvents); referenceBases += includesReference ? block_getLength(block) * sampleNumber : 0; otherReferenceBases += includesOtherReference ? block_getLength(block) * sampleNumber : 0; } }
void testBlock_instanceIterator(CuTest* testCase) { cactusBlockTestSetup(); Block_InstanceIterator *iterator; iterator = block_getInstanceIterator(block); CuAssertTrue(testCase, iterator != NULL); CuAssertTrue(testCase, block_getNext(iterator) == segment_getReverse(rootSegment)); CuAssertTrue(testCase, block_getNext(iterator) == leaf1Segment); Block_InstanceIterator *iterator2 = block_copyInstanceIterator(iterator); CuAssertTrue(testCase, block_getNext(iterator) == segment_getReverse(leaf2Segment)); CuAssertTrue(testCase, block_getNext(iterator) == NULL); CuAssertTrue(testCase, block_getPrevious(iterator) == segment_getReverse(leaf2Segment)); CuAssertTrue(testCase, block_getPrevious(iterator) == leaf1Segment); CuAssertTrue(testCase, block_getPrevious(iterator) == segment_getReverse(rootSegment)); CuAssertTrue(testCase, block_getPrevious(iterator) == NULL); CuAssertTrue(testCase, block_getNext(iterator2) == segment_getReverse(leaf2Segment)); CuAssertTrue(testCase, block_getNext(iterator2) == NULL); CuAssertTrue(testCase, block_getPrevious(iterator2) == segment_getReverse(leaf2Segment)); CuAssertTrue(testCase, block_getPrevious(iterator2) == leaf1Segment); CuAssertTrue(testCase, block_getPrevious(iterator2) == segment_getReverse(rootSegment)); CuAssertTrue(testCase, block_getPrevious(iterator2) == NULL); block_destructInstanceIterator(iterator); block_destructInstanceIterator(iterator2); iterator = block_getInstanceIterator(block_getReverse(block)); CuAssertTrue(testCase, block_getNext(iterator) == rootSegment); CuAssertTrue(testCase, block_getNext(iterator) == segment_getReverse(leaf1Segment)); CuAssertTrue(testCase, block_getNext(iterator) == leaf2Segment); CuAssertTrue(testCase, block_getNext(iterator) == NULL); CuAssertTrue(testCase, block_getPrevious(iterator) == leaf2Segment); CuAssertTrue(testCase, block_getPrevious(iterator) == segment_getReverse(leaf1Segment)); CuAssertTrue(testCase, block_getPrevious(iterator) == rootSegment); CuAssertTrue(testCase, block_getPrevious(iterator) == NULL); block_destructInstanceIterator(iterator); cactusBlockTestTeardown(); }
Segment *block_getSegmentForEvent(Block *block, Name eventName) { /* * Get the segment for a given event. */ Block_InstanceIterator *it = block_getInstanceIterator(block); Segment *segment; while ((segment = block_getNext(it)) != NULL) { if (event_getName(segment_getEvent(segment)) == eventName) { block_destructInstanceIterator(it); return segment; } } block_destructInstanceIterator(it); return NULL; }
void block_check(Block *block) { //Check is connected to flower properly cactusCheck(flower_getBlock(block_getFlower(block), block_getName(block)) == block_getPositiveOrientation(block)); //Check we have actually set built blocks for the flower.. cactusCheck(flower_builtBlocks(block_getFlower(block))); //Checks the two ends are block ends. End *_5End = block_get5End(block); End *_3End = block_get3End(block); cactusCheck(end_isBlockEnd(_5End)); cactusCheck(end_isBlockEnd(_3End)); cactusCheck(end_getOrientation(_5End) == block_getOrientation(block)); cactusCheck(end_getOrientation(_3End) == block_getOrientation(block)); cactusCheck(end_getBlock(_5End) == block); cactusCheck(end_getBlock(_3End) == block); cactusCheck(end_getSide(_5End)); //Check the sides of the ends are consistent. cactusCheck(!end_getSide(_3End)); cactusCheck(block_getLength(block) > 0); //check block has non-zero length //Check reverse Block *rBlock = block_getReverse(block); cactusCheck(rBlock != NULL); cactusCheck(block_getReverse(block) == rBlock); cactusCheck(block_getOrientation(block) == !block_getOrientation(rBlock)); cactusCheck(block_getLength(block) == block_getLength(rBlock)); cactusCheck(block_get5End(block) == end_getReverse(block_get3End(rBlock))); cactusCheck(block_get3End(block) == end_getReverse(block_get5End(rBlock))); cactusCheck(block_getInstanceNumber(block) == block_getInstanceNumber(rBlock)); if(block_getInstanceNumber(block) > 0) { cactusCheck(block_getFirst(block) == segment_getReverse(block_getFirst(rBlock))); if(block_getRootInstance(block) == NULL) { cactusCheck(block_getRootInstance(rBlock) == NULL); } else { cactusCheck(block_getRootInstance(block) == segment_getReverse(block_getRootInstance(rBlock))); } } //For each segment calls segment_check. Block_InstanceIterator *iterator = block_getInstanceIterator(block); Segment *segment; while((segment = block_getNext(iterator)) != NULL) { segment_check(segment); } block_destructInstanceIterator(iterator); }
void block_split(Block *block, int64_t splitPoint, Block **leftBlock, Block **rightBlock) { assert(splitPoint > 0); assert(splitPoint < block_getLength(block)); *leftBlock = block_construct(splitPoint, block_getFlower(block)); *rightBlock = block_construct(block_getLength(block) - splitPoint, block_getFlower(block)); Segment *segment = block_getRootInstance(block); if(segment != NULL) { block_splitP2(segment, NULL, NULL, *leftBlock, *rightBlock); } else { Block_InstanceIterator *instanceIterator = block_getInstanceIterator(block); while((segment = block_getNext(instanceIterator)) != NULL) { block_splitP(segment, *leftBlock, *rightBlock); } block_destructInstanceIterator(instanceIterator); } block_destruct(block); }
static bool duplicated(Segment *segment) { Sequence *sequence = segment_getSequence(segment); assert(sequence != NULL); MetaSequence *metaSequence = sequence_getMetaSequence(sequence); Block *block = segment_getBlock(segment); Block_InstanceIterator *it = block_getInstanceIterator(block); Segment *segment2; while((segment2 = block_getNext(it)) != NULL) { if(segment != segment2) { assert(segment != segment_getReverse(segment2)); Sequence *sequence2 = segment_getSequence(segment2); if(sequence2 != NULL && sequence_getMetaSequence(sequence2) == metaSequence) { block_destructInstanceIterator(it); return 1; } } } block_destructInstanceIterator(it); return 0; }
static void getSnpStats(Block *block, FILE *fileHandle) { if (block_getLength(block) >= minimumBlockLength) { //Now get the column Block_InstanceIterator *instanceIterator = block_getInstanceIterator(block); Segment *segment; char *hap1Seq = NULL; char *hap2Seq = NULL; char *assemblySeq = NULL; Segment *hap1Segment = NULL; Segment *hap2Segment = NULL; while ((segment = block_getNext(instanceIterator)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segment)), hap1EventString) == 0) { if (hap1Seq != NULL) { goto end; } hap1Seq = segment_getString(segment); hap1Segment = segment; } if (strcmp(event_getHeader(segment_getEvent(segment)), hap2EventString) == 0) { if (hap2Seq != NULL) { goto end; } hap2Seq = segment_getString(segment); hap2Segment = segment; } if (strcmp(event_getHeader(segment_getEvent(segment)), assemblyEventString) == 0) { if (assemblySeq != NULL) { goto end; } assemblySeq = segment_getString(segment); } } assert(minimumIndentity >= 0); assert(minimumIndentity <= 100); if (hap1Seq != NULL || hap2Seq != NULL) { if (hap1Seq != NULL) { assert(strlen(hap1Seq) == block_getLength(block)); } if (hap2Seq != NULL) { assert(strlen(hap2Seq) == block_getLength(block)); } if (assemblySeq != NULL) { assert(strlen(assemblySeq) == block_getLength(block)); } double homoMatches = 0; double matches = 0; for (int64_t i = ignoreFirstNBasesOfBlock; i < block_getLength(block) - ignoreFirstNBasesOfBlock; i++) { if (hap1Seq != NULL && hap2Seq != NULL) { if (toupper(hap1Seq[i]) == toupper(hap2Seq[i])) { homoMatches++; } } else { homoMatches = INT64_MAX; } if (assemblySeq != NULL) { if (hap1Seq != NULL) { if (hap2Seq != NULL) { if (toupper(hap1Seq[i]) == toupper(hap2Seq[i]) && toupper(hap1Seq[i]) == toupper( assemblySeq[i])) { matches++; } } else { if (toupper(hap1Seq[i]) == toupper(assemblySeq[i])) { matches++; } } } else { assert(hap2Seq != NULL); if (toupper(hap2Seq[i]) == toupper(assemblySeq[i])) { matches++; } } } else { matches = INT64_MAX; } } double homoIdentity = 100.0 * homoMatches / (block_getLength(block) - 2.0 * ignoreFirstNBasesOfBlock); double identity = 100.0 * matches / (block_getLength(block) - 2.0 * ignoreFirstNBasesOfBlock); if (homoIdentity >= minimumIndentity && identity >= minimumIndentity) { //We're in gravy. for (int64_t i = ignoreFirstNBasesOfBlock; i < block_getLength(block) - ignoreFirstNBasesOfBlock; i++) { if (hap1Seq != NULL) { if (hap2Seq != NULL) { if (toupper(hap1Seq[i]) == toupper(hap2Seq[i])) { totalSites++; if (assemblySeq != NULL) { totalCorrect += bitsScoreFn(assemblySeq[i], hap1Seq[i]); totalErrors += correctFn(assemblySeq[i], hap1Seq[i]) ? 0 : 1; totalCalls++; } } else { totalHeterozygous++; if (assemblySeq != NULL) { assert(toupper(hap1Seq[i]) != toupper(hap2Seq[i])); totalCorrectInHeterozygous += bitsScoreFn(assemblySeq[i], hap1Seq[i]); totalCorrectHap1InHeterozygous += bitsScoreFn(assemblySeq[i], hap1Seq[i]); totalCorrectInHeterozygous += bitsScoreFn(assemblySeq[i], hap2Seq[i]); totalCorrectHap2InHeterozygous += bitsScoreFn(assemblySeq[i], hap2Seq[i]); totalErrorsInHeterozygous += (correctFn(assemblySeq[i], hap1Seq[i]) || correctFn( assemblySeq[i], hap2Seq[i])) ? 0 : 1; totalCallsInHeterozygous++; if (!(correctFn(assemblySeq[i], hap1Seq[i]) || correctFn(assemblySeq[i], hap2Seq[i]))) { stList_append(hetPositions, segmentHolder_construct(hap1Segment, i, assemblySeq[i], hap1Seq[i], hap2Seq[i])); } } } } else { totalInOneHaplotypeOnly++; if (assemblySeq != NULL) { totalCorrectInOneHaplotype += bitsScoreFn(assemblySeq[i], hap1Seq[i]); totalErrorsInOneHaplotype += correctFn(assemblySeq[i], hap1Seq[i]) ? 0 : 1; totalCallsInOneHaplotype++; if (!correctFn(assemblySeq[i], hap1Seq[i])) { stList_append(indelPositions, segmentHolder_construct(hap1Segment, i, assemblySeq[i], hap1Seq[i], 'N')); } } } } else { if (hap2Seq != NULL) { totalInOneHaplotypeOnly++; if (assemblySeq != NULL) { totalCorrectInOneHaplotype += bitsScoreFn(assemblySeq[i], hap2Seq[i]); totalErrorsInOneHaplotype += correctFn(assemblySeq[i], hap2Seq[i]) ? 0 : 1; totalCallsInOneHaplotype++; if (!correctFn(assemblySeq[i], hap2Seq[i])) { stList_append(indelPositions, segmentHolder_construct(hap2Segment, i, assemblySeq[i], 'N', hap2Seq[i])); } } } } } } } end: //cleanup if (hap1Seq != NULL) { free(hap1Seq); } if (hap2Seq != NULL) { free(hap2Seq); } if (assemblySeq != NULL) { free(assemblySeq); } block_destructInstanceIterator(instanceIterator); } }