bool trueAdjacency(Cap *cap, stList *eventStrings) { if(getTerminalAdjacencyLength(cap) > 0) { return 0; } cap = getTerminalCap(cap); assert(cap != NULL); Cap *otherCap = cap_getAdjacency(cap); assert(otherCap != NULL); assert(cap_getAdjacency(otherCap) == cap); //So is the adjacency present in one of the haplotypes? That's what we're going to answer.. End *otherEnd = end_getPositiveOrientation(cap_getEnd(otherCap)); End_InstanceIterator *endInstanceIt = end_getInstanceIterator(cap_getEnd(cap)); Cap *cap2; while ((cap2 = end_getNext(endInstanceIt)) != NULL) { Cap *otherCap2 = cap_getAdjacency(cap2); assert(otherCap2 != NULL); if (otherEnd == end_getPositiveOrientation(cap_getEnd(otherCap2))) { //const char *eventName = event_getHeader(cap_getEvent(cap2)); assert(event_getHeader(cap_getEvent(cap2)) == event_getHeader( cap_getEvent(otherCap2))); if (capHasGivenEvents(cap2, eventStrings)) { //strcmp(eventName, "hapA1") == 0 || strcmp(eventName, "hapA2") == 0) { if(getTerminalAdjacencyLength(cap2) == 0) { end_destructInstanceIterator(endInstanceIt); return 1; } } } } end_destructInstanceIterator(endInstanceIt); return 0; }
static char *eventTree_makeNewickStringP(Event *event) { int64_t i; char *cA = NULL; char *cA3; if(event_getChildNumber(event) > 0) { for(i=0;i<event_getChildNumber(event); i++) { char *cA2 = eventTree_makeNewickStringP(event_getChild(event, i)); if(i > 0) { cA3 = st_malloc(sizeof(char)*(strlen(cA)+strlen(cA2)+2)); sprintf(cA3, "%s,%s", cA, cA2); free(cA); cA = cA3; } else { cA = st_malloc(sizeof(char)*(strlen(cA2)+2)); sprintf(cA, "(%s", cA2); } free(cA2); } cA3 = st_malloc(sizeof(char)*(strlen(cA) + strlen(event_getHeader(event)) + 30)); sprintf(cA3, "%s)%s:%g", cA, event_getHeader(event), event_getBranchLength(event)); free(cA); cA = cA3; } else { cA = st_malloc(sizeof(char)*(strlen(event_getHeader(event)) + 30)); sprintf(cA, "%s:%g", event_getHeader(event), event_getBranchLength(event)); } return cA; }
bool capsAreAdjacent(Cap *cap1, Cap *cap2, int64_t *separationDistance) { if (cap_getName(cap2) != cap_getName(cap1) && cap_getCoordinate(cap1) != cap_getCoordinate(cap2)) { //This can happen if end1 == end2 if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) { assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader( cap_getEvent(cap2))) == 0); assert(cap_getPositiveOrientation(cap1) != cap_getPositiveOrientation(cap2)); assert(cap_getName(cap1) != cap_getName(cap2)); assert(sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))); if (!cap_getStrand(cap1)) { cap1 = cap_getReverse(cap1); } if (!cap_getStrand(cap2)) { cap2 = cap_getReverse(cap2); } assert(cap_getStrand(cap1)); assert(cap_getStrand(cap2)); if (cap_getCoordinate(cap1) < cap_getCoordinate(cap2)) { if (!cap_getSide(cap1) && cap_getSide(cap2)) { *separationDistance = cap_getCoordinate(cap2) - cap_getCoordinate(cap1) - 1; //The minus 1, to give the length of the sequence between the two caps. return 1; } } else { if (cap_getSide(cap1) && !cap_getSide(cap2)) { *separationDistance = cap_getCoordinate(cap1) - cap_getCoordinate(cap2) - 1; return 1; } } } } return 0; }
void testEvent_getHeader(CuTest* testCase) { cactusEventTestSetup(); CuAssertStrEquals(testCase, "ROOT", event_getHeader(rootEvent)); CuAssertStrEquals(testCase, "INTERNAL", event_getHeader(internalEvent)); CuAssertStrEquals(testCase, "LEAF1", event_getHeader(leafEvent1)); CuAssertStrEquals(testCase, "", event_getHeader(leafEvent2)); cactusEventTestTeardown(); }
bool linked(Segment *segmentX, Segment *segmentY, int64_t difference, const char *eventString, bool *aligned) { assert(segment_getStrand(segmentX)); assert(segment_getStrand(segmentY)); *aligned = 0; if (segment_getStart(segmentX) < segment_getStart(segmentY)) { Block *blockX = segment_getBlock(segmentX); Block *blockY = segment_getBlock(segmentY); Block_InstanceIterator *instanceItX = block_getInstanceIterator(blockX); Segment *segmentX2; while ((segmentX2 = block_getNext(instanceItX)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segmentX2)), eventString) == 0) { Block_InstanceIterator *instanceItY = block_getInstanceIterator(blockY); Segment *segmentY2; while ((segmentY2 = block_getNext(instanceItY)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segmentY2)), eventString) == 0) { *aligned = 1; if (sequence_getMetaSequence( segment_getSequence(segmentX2)) == sequence_getMetaSequence( segment_getSequence(segmentY2))) { //Have the same assembly sequence //Now check if the two segments are connected by a path of adjacency from the 3' end of segmentX to the 5' end of segmentY. int64_t separationDistance; if (capsAreAdjacent(segment_get3Cap(segmentX2), segment_get5Cap(segmentY2), &separationDistance)) { //if(difference < 10000 || (separationDistance <= difference * 1.5 && difference <= separationDistance * 1.5)) { block_destructInstanceIterator(instanceItX); block_destructInstanceIterator(instanceItY); return 1; //} } } } } block_destructInstanceIterator(instanceItY); } } block_destructInstanceIterator(instanceItX); } else { assert(segmentX == segmentY); if(hasCapInEvent(block_get5End(segment_getBlock(segmentX)), eventString)) { *aligned = 1; return 1; } } return 0; }
void printPositions(stList *positions, const char *substitutionType, FILE *fileHandle) { for (int64_t i = 0; i < stList_length(positions); i++) { SegmentHolder *segmentHolder = stList_get(positions, i); int64_t j = segment_getStart(segmentHolder->segment); if (segment_getStrand(segmentHolder->segment)) { j += segmentHolder->offset; assert( cap_getCoordinate(segment_get5Cap(segmentHolder->segment)) == segment_getStart( segmentHolder->segment)); assert( segment_getStart(segmentHolder->segment) + segment_getLength(segmentHolder->segment) - 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } else { j -= segmentHolder->offset; assert( segment_getStart(segmentHolder->segment) - segment_getLength(segmentHolder->segment) + 1 == cap_getCoordinate(segment_get3Cap(segmentHolder->segment))); } fprintf(fileHandle, "%s: %s_%" PRIi64 " %" PRIi64 " %c %c %c\n", substitutionType, event_getHeader(segment_getEvent(segmentHolder->segment)), sequence_getLength(segment_getSequence(segmentHolder->segment)), j, segmentHolder->base1, segmentHolder->base2, segmentHolder->base3); getMAFBlock(segment_getBlock(segmentHolder->segment), fileHandle); } }
static void getMetaSequencesForEventsP(stSortedSet *metaSequences, Flower *flower, stList *eventStrings) { //Iterate over the sequences in the flower. Flower_SequenceIterator *seqIt = flower_getSequenceIterator(flower); Sequence *sequence; while ((sequence = flower_getNextSequence(seqIt)) != NULL) { MetaSequence *metaSequence = sequence_getMetaSequence(sequence); if (stringIsInList(event_getHeader(sequence_getEvent(sequence)), eventStrings) == 0) { if (stSortedSet_search(metaSequences, metaSequence) == NULL) { stSortedSet_insert(metaSequences, metaSequence); } } } flower_destructSequenceIterator(seqIt); //Recurse over the flowers Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMetaSequencesForEventsP(metaSequences, group_getNestedFlower(group), eventStrings); } } flower_destructGroupIterator(groupIt); }
static void getMaximalHaplotypePathsCheck(Flower *flower, stSortedSet *segmentSet, const char *eventString, stList *eventStrings) { /* * Do debug checks that the haplotypes paths are well formed. */ Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) { if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //isHaplotypeEnd(cap_getEnd(segment_get5Cap(segment)))) { assert(stSortedSet_search(segmentSet, segment) != NULL || stSortedSet_search(segmentSet, segment_getReverse( segment)) != NULL); } } } flower_destructSegmentIterator(segmentIt); Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMaximalHaplotypePathsCheck(group_getNestedFlower(group), segmentSet, eventString, eventStrings); } } flower_destructGroupIterator(groupIt); }
static void getMAFBlock2(Block *block, FILE *fileHandle) { if (block_getLength(block) >= minimumBlockLength) { //Calculate bases in the reference and other reference sequence Block_InstanceIterator *instanceIt = block_getInstanceIterator(block); bool includesReference = 0; bool includesOtherReference = 0; Segment *segment; while ((segment = block_getNext(instanceIt)) != NULL) { const char *segmentEvent = event_getHeader( segment_getEvent(segment)); if (strcmp(segmentEvent, referenceEventString) == 0) { includesReference = 1; } else if (strcmp(segmentEvent, otherReferenceEventString) == 0) { includesOtherReference = 1; } } block_destructInstanceIterator(instanceIt); if (ignoreOtherReferenceBlocks && includesOtherReference) { return; } stSortedSet *otherSampleEvents = stSortedSet_construct3( (int(*)(const void *, const void *)) strcmp, NULL); instanceIt = block_getInstanceIterator(block); int32_t sampleNumber = 0; while ((segment = block_getNext(instanceIt)) != NULL) { const char *segmentEvent = event_getHeader( segment_getEvent(segment)); if (strcmp(segmentEvent, sampleEventString) == 0) { sampleNumber++; } else if (strcmp(segmentEvent, referenceEventString) != 0) { stSortedSet_insert(otherSampleEvents, (void *) segmentEvent); } } block_destructInstanceIterator(instanceIt); baseCoverages[stSortedSet_size(otherSampleEvents)] += block_getLength( block) * sampleNumber; stSortedSet_destruct(otherSampleEvents); referenceBases += includesReference ? block_getLength(block) * sampleNumber : 0; otherReferenceBases += includesOtherReference ? block_getLength(block) * sampleNumber : 0; } }
static bool capHasGivenEvents(Cap *cap, stList *eventStrings) { const char *headerSeq = event_getHeader(cap_getEvent(cap)); for (int64_t i = 0; i < stList_length(eventStrings); i++) { if (strcmp(headerSeq, stList_get(eventStrings, i)) == 0) { return 1; } } return 0; }
stList *getContigPaths(Flower *flower, const char *eventString, stList *eventStrings) { stList *maximalHaplotypePaths = stList_construct3(0, (void(*)(void *)) stList_destruct); stSortedSet *segmentSet = stSortedSet_construct(); getMaximalHaplotypePathsP(flower, maximalHaplotypePaths, segmentSet, eventString, eventStrings); //Do some debug checks.. st_logDebug("We have %" PRIi64 " maximal haplotype paths\n", stList_length( maximalHaplotypePaths)); getMaximalHaplotypePathsCheck(flower, segmentSet, eventString, eventStrings); for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) { stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i); st_logDebug("We have a maximal haplotype path with length %" PRIi64 "\n", stList_length(maximalHaplotypePath)); assert(stList_length(maximalHaplotypePath) > 0); Segment *_5Segment = stList_get(maximalHaplotypePath, 0); Segment *_3Segment = stList_get(maximalHaplotypePath, stList_length( maximalHaplotypePath) - 1); if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) { assert(!trueAdjacency(segment_get5Cap(_5Segment), eventStrings)); } if (getAdjacentCapsSegment(segment_get3Cap(_3Segment)) != NULL) { assert(!trueAdjacency(segment_get3Cap(_3Segment), eventStrings)); } for (int64_t j = 0; j < stList_length(maximalHaplotypePath) - 1; j++) { _5Segment = stList_get(maximalHaplotypePath, j); _3Segment = stList_get(maximalHaplotypePath, j + 1); assert(trueAdjacency(segment_get3Cap(_5Segment), eventStrings)); assert(trueAdjacency(segment_get5Cap(_3Segment), eventStrings)); assert(cap_getAdjacency(getTerminalCap(segment_get3Cap(_5Segment))) == getTerminalCap(segment_get5Cap(_3Segment))); assert(strcmp(event_getHeader(segment_getEvent(_5Segment)), eventString) == 0); assert(strcmp(event_getHeader(segment_getEvent(_3Segment)), eventString) == 0); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_5Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_5Segment)))); assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_3Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_3Segment)))); } } stSortedSet_destruct(segmentSet); return maximalHaplotypePaths; }
void event_writeBinaryRepresentation(Event *event, void(*writeFn)( const void * ptr, size_t size, size_t count)) { binaryRepresentation_writeElementType(CODE_EVENT, writeFn); binaryRepresentation_writeName(event_getName(event_getParent(event)), writeFn); binaryRepresentation_writeName(event_getName(event), writeFn); binaryRepresentation_writeFloat(event_getBranchLength(event), writeFn); binaryRepresentation_writeString(event_getHeader(event), writeFn); binaryRepresentation_writeBool(event_isOutgroup(event), writeFn); }
// Used for interactive debugging. void stCaf_printBlock(stPinchBlock *block) { stPinchBlockIt blockIt = stPinchBlock_getSegmentIterator(block); stPinchSegment *segment; while ((segment = stPinchBlockIt_getNext(&blockIt)) != NULL) { stPinchThread *thread = stPinchSegment_getThread(segment); Cap *cap = flower_getCap(flower, stPinchThread_getName(thread)); Event *event = cap_getEvent(cap); Sequence *sequence = cap_getSequence(cap); printf("%s.%s:%" PRIi64 "-%" PRIi64 ":%s\n", event_getHeader(event), sequence_getHeader(sequence), stPinchSegment_getStart(segment), stPinchSegment_getStart(segment) + stPinchSegment_getLength(segment), stPinchSegment_getBlockOrientation(segment) ? "+" : "-"); } }
bool hasCapNotInEvent(End *end, const char *eventString) { Cap *cap; End_InstanceIterator *instanceIt = end_getInstanceIterator(end); while ((cap = end_getNext(instanceIt)) != NULL) { if (strcmp(event_getHeader(cap_getEvent(cap)), eventString) != 0) { end_destructInstanceIterator(instanceIt); return 1; } } end_destructInstanceIterator(instanceIt); return 0; }
Event *eventTree_getEventByHeader(EventTree *eventTree, const char *eventHeader) { EventTree_Iterator *it = eventTree_getIterator(eventTree); Event *event; while ((event = eventTree_getNext(it)) != NULL) { if (strcmp(event_getHeader(event), eventHeader) == 0) { eventTree_destructIterator(it); return event; } } eventTree_destructIterator(it); return NULL; }
bool endsAreConnected(End *end1, End *end2, stList *eventStrings) { if (end_getName(end1) == end_getName(end2)) { //Then the ends are the same and are part of the same chromosome by definition. End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1); Cap *cap1; while ((cap1 = end_getNext(instanceIterator)) != NULL) { if (capHasGivenEvents(cap1, eventStrings)) { end_destructInstanceIterator(instanceIterator); return 1; } } return 0; } End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1); Cap *cap1; while ((cap1 = end_getNext(instanceIterator)) != NULL) { if (capHasGivenEvents(cap1, eventStrings)) { End_InstanceIterator *instanceIterator2 = end_getInstanceIterator(end2); Cap *cap2; while ((cap2 = end_getNext(instanceIterator2)) != NULL) { assert(cap_getName(cap2) != cap_getName(cap1)); //This could only happen if end1 == end2 if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) { assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader(cap_getEvent(cap2))) == 0); assert(cap_getPositiveOrientation(cap1) != cap_getPositiveOrientation(cap2)); assert(cap_getName(cap1) != cap_getName(cap2)); //they could have the same coordinate if they represent two ends of a block of length 1. end_destructInstanceIterator(instanceIterator); end_destructInstanceIterator(instanceIterator2); return 1; } } end_destructInstanceIterator(instanceIterator2); } } end_destructInstanceIterator(instanceIterator); return 0; }
static MetaSequence *addMetaSequence(Flower *flower, Cap *cap, int64_t index, char *string, bool trivialString) { /* * Adds a meta sequence representing a top level thread to the cactus disk. * The sequence is all 'N's at this stage. */ Event *referenceEvent = cap_getEvent(cap); assert(referenceEvent != NULL); char *sequenceName = stString_print("%srefChr%" PRIi64 "", event_getHeader(referenceEvent), index); //char *sequenceName = stString_print("refChr%" PRIi64 "", index); MetaSequence *metaSequence = metaSequence_construct3(1, strlen(string), string, sequenceName, event_getName(referenceEvent), trivialString, flower_getCactusDisk(flower)); free(sequenceName); return metaSequence; }
static Sequence *getSequenceMatchesEvent(Flower *flower, char *referenceEventString){ //Returns the first Sequence whose name matches 'header' Flower_SequenceIterator *it = flower_getSequenceIterator(flower); Sequence *sequence; while((sequence = flower_getNextSequence(it)) != NULL){ Event* event = sequence_getEvent(sequence); const char* eventName = event_getHeader(event); if (strcmp(eventName, referenceEventString) == 0){ flower_destructSequenceIterator(it); return sequence; } } flower_destructSequenceIterator(it); return NULL; }
void eventTree_copyConstructP(EventTree *eventTree, Event *event, int64_t (unaryEventFilterFn)(Event *event)) { int64_t i; Event *event2; for(i=0; i<event_getChildNumber(event); i++) { event2 = event_getChild(event, i); while(event_getChildNumber(event2) == 1 && unaryEventFilterFn != NULL && !unaryEventFilterFn(event2)) { //skip the event event2 = event_getChild(event2, 0); } event_setOutgroupStatus(event_construct(event_getName(event2), event_getHeader(event2), event_getBranchLength(event2), eventTree_getEvent(eventTree, event_getName(event)), eventTree), event_isOutgroup(event2)); eventTree_copyConstructP(eventTree, event2, unaryEventFilterFn); } }
static stSortedSet *getEventStrings(End *end, stList *eventStrings) { stSortedSet *eventStringsSet = stSortedSet_construct3( (int(*)(const void *, const void *)) strcmp, NULL); End_InstanceIterator *instanceIt = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIt)) != NULL) { const char *header = event_getHeader(cap_getEvent(cap)); for(int64_t i=0; i<stList_length(eventStrings); i++) { if(strcmp(stList_get(eventStrings, i), header) == 0) { stSortedSet_insert(eventStringsSet, (void *) header); } } } end_destructInstanceIterator(instanceIt); return eventStringsSet; }
static void getMaximalHaplotypePathsP(Flower *flower, stList *maximalHaplotypePaths, stSortedSet *segmentSet, const char *eventString, stList *eventStrings) { /* * Iterate through the segments in this flower. */ Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower); Segment *segment; while ((segment = flower_getNextSegment(segmentIt)) != NULL) { if (stSortedSet_search(segmentSet, segment) == NULL && stSortedSet_search(segmentSet, segment_getReverse(segment)) == NULL) { //Check we haven't yet seen this segment if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) { //Check if the segment is in the assembly if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //Is a block in a haplotype segment assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment)))); stList *maximalHaplotypePath = stList_construct(); stList_append(maximalHaplotypePaths, maximalHaplotypePath); getMaximalHaplotypePathsP2(segment, maximalHaplotypePath, segmentSet, eventStrings); } else { assert(!hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings));//assert(!isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment)))); } } } } flower_destructSegmentIterator(segmentIt); /* * Now recurse on the contained flowers. */ Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIt)) != NULL) { if (group_getNestedFlower(group) != NULL) { getMaximalHaplotypePathsP(group_getNestedFlower(group), maximalHaplotypePaths, segmentSet, eventString, eventStrings); } } flower_destructGroupIterator(groupIt); }
static void getReferenceSequences(FILE *fileHandle, Flower *flower, char *referenceEventString){ //get names of all the sequences in 'flower' for event with name 'referenceEventString' Sequence *sequence; Flower_SequenceIterator * seqIterator = flower_getSequenceIterator(flower); while((sequence = flower_getNextSequence(seqIterator)) != NULL) { Event* event = sequence_getEvent(sequence); const char* eventName = event_getHeader(event); if (strcmp(eventName, referenceEventString) == 0 && sequence_getLength(sequence) > 0 && !metaSequence_isTrivialSequence(sequence_getMetaSequence(sequence))) { const char *sequenceHeader = formatSequenceHeader(sequence); st_logInfo("Sequence %s\n", sequenceHeader); char *string = sequence_getString(sequence, sequence_getStart(sequence), sequence_getLength(sequence), 1); fastaWrite(string, (char *)sequenceHeader, fileHandle); free(string); } } flower_destructSequenceIterator(seqIterator); return; }
static void getSnpStats(Block *block, FILE *fileHandle) { if (block_getLength(block) >= minimumBlockLength) { //Now get the column Block_InstanceIterator *instanceIterator = block_getInstanceIterator(block); Segment *segment; char *hap1Seq = NULL; char *hap2Seq = NULL; char *assemblySeq = NULL; Segment *hap1Segment = NULL; Segment *hap2Segment = NULL; while ((segment = block_getNext(instanceIterator)) != NULL) { if (strcmp(event_getHeader(segment_getEvent(segment)), hap1EventString) == 0) { if (hap1Seq != NULL) { goto end; } hap1Seq = segment_getString(segment); hap1Segment = segment; } if (strcmp(event_getHeader(segment_getEvent(segment)), hap2EventString) == 0) { if (hap2Seq != NULL) { goto end; } hap2Seq = segment_getString(segment); hap2Segment = segment; } if (strcmp(event_getHeader(segment_getEvent(segment)), assemblyEventString) == 0) { if (assemblySeq != NULL) { goto end; } assemblySeq = segment_getString(segment); } } assert(minimumIndentity >= 0); assert(minimumIndentity <= 100); if (hap1Seq != NULL || hap2Seq != NULL) { if (hap1Seq != NULL) { assert(strlen(hap1Seq) == block_getLength(block)); } if (hap2Seq != NULL) { assert(strlen(hap2Seq) == block_getLength(block)); } if (assemblySeq != NULL) { assert(strlen(assemblySeq) == block_getLength(block)); } double homoMatches = 0; double matches = 0; for (int64_t i = ignoreFirstNBasesOfBlock; i < block_getLength(block) - ignoreFirstNBasesOfBlock; i++) { if (hap1Seq != NULL && hap2Seq != NULL) { if (toupper(hap1Seq[i]) == toupper(hap2Seq[i])) { homoMatches++; } } else { homoMatches = INT64_MAX; } if (assemblySeq != NULL) { if (hap1Seq != NULL) { if (hap2Seq != NULL) { if (toupper(hap1Seq[i]) == toupper(hap2Seq[i]) && toupper(hap1Seq[i]) == toupper( assemblySeq[i])) { matches++; } } else { if (toupper(hap1Seq[i]) == toupper(assemblySeq[i])) { matches++; } } } else { assert(hap2Seq != NULL); if (toupper(hap2Seq[i]) == toupper(assemblySeq[i])) { matches++; } } } else { matches = INT64_MAX; } } double homoIdentity = 100.0 * homoMatches / (block_getLength(block) - 2.0 * ignoreFirstNBasesOfBlock); double identity = 100.0 * matches / (block_getLength(block) - 2.0 * ignoreFirstNBasesOfBlock); if (homoIdentity >= minimumIndentity && identity >= minimumIndentity) { //We're in gravy. for (int64_t i = ignoreFirstNBasesOfBlock; i < block_getLength(block) - ignoreFirstNBasesOfBlock; i++) { if (hap1Seq != NULL) { if (hap2Seq != NULL) { if (toupper(hap1Seq[i]) == toupper(hap2Seq[i])) { totalSites++; if (assemblySeq != NULL) { totalCorrect += bitsScoreFn(assemblySeq[i], hap1Seq[i]); totalErrors += correctFn(assemblySeq[i], hap1Seq[i]) ? 0 : 1; totalCalls++; } } else { totalHeterozygous++; if (assemblySeq != NULL) { assert(toupper(hap1Seq[i]) != toupper(hap2Seq[i])); totalCorrectInHeterozygous += bitsScoreFn(assemblySeq[i], hap1Seq[i]); totalCorrectHap1InHeterozygous += bitsScoreFn(assemblySeq[i], hap1Seq[i]); totalCorrectInHeterozygous += bitsScoreFn(assemblySeq[i], hap2Seq[i]); totalCorrectHap2InHeterozygous += bitsScoreFn(assemblySeq[i], hap2Seq[i]); totalErrorsInHeterozygous += (correctFn(assemblySeq[i], hap1Seq[i]) || correctFn( assemblySeq[i], hap2Seq[i])) ? 0 : 1; totalCallsInHeterozygous++; if (!(correctFn(assemblySeq[i], hap1Seq[i]) || correctFn(assemblySeq[i], hap2Seq[i]))) { stList_append(hetPositions, segmentHolder_construct(hap1Segment, i, assemblySeq[i], hap1Seq[i], hap2Seq[i])); } } } } else { totalInOneHaplotypeOnly++; if (assemblySeq != NULL) { totalCorrectInOneHaplotype += bitsScoreFn(assemblySeq[i], hap1Seq[i]); totalErrorsInOneHaplotype += correctFn(assemblySeq[i], hap1Seq[i]) ? 0 : 1; totalCallsInOneHaplotype++; if (!correctFn(assemblySeq[i], hap1Seq[i])) { stList_append(indelPositions, segmentHolder_construct(hap1Segment, i, assemblySeq[i], hap1Seq[i], 'N')); } } } } else { if (hap2Seq != NULL) { totalInOneHaplotypeOnly++; if (assemblySeq != NULL) { totalCorrectInOneHaplotype += bitsScoreFn(assemblySeq[i], hap2Seq[i]); totalErrorsInOneHaplotype += correctFn(assemblySeq[i], hap2Seq[i]) ? 0 : 1; totalCallsInOneHaplotype++; if (!correctFn(assemblySeq[i], hap2Seq[i])) { stList_append(indelPositions, segmentHolder_construct(hap2Segment, i, assemblySeq[i], 'N', hap2Seq[i])); } } } } } } } end: //cleanup if (hap1Seq != NULL) { free(hap1Seq); } if (hap2Seq != NULL) { free(hap2Seq); } if (assemblySeq != NULL) { free(assemblySeq); } block_destructInstanceIterator(instanceIterator); } }
int main(int argc, char *argv[]) { ////////////////////////////////////////////// //Parse the inputs ////////////////////////////////////////////// parseBasicArguments(argc, argv, "coverageStats"); assert(referenceEventString != NULL); assert(otherReferenceEventString != NULL); assert(outgroupEventString != NULL); /////////////////////////////////////////////////////////////////////////// // Calculate and print to file a crap load of numbers. /////////////////////////////////////////////////////////////////////////// Sequence *referenceSequence = NULL; Sequence *otherReferenceSequence = NULL; Flower_SequenceIterator *sequenceIt = flower_getSequenceIterator(flower); Sequence *sequence; while ((sequence = flower_getNextSequence(sequenceIt)) != NULL) { const char *eventHeader = event_getHeader(sequence_getEvent(sequence)); if (eventHeader != NULL && strcmp(eventHeader, referenceEventString) == 0) { if (referenceSequence == NULL || sequence_getLength(sequence) >= sequence_getLength(referenceSequence)) { referenceSequence = sequence; } } if (eventHeader != NULL && strcmp(eventHeader, otherReferenceEventString) == 0) { if (otherReferenceSequence == NULL || sequence_getLength(sequence) >= sequence_getLength(otherReferenceSequence)) { otherReferenceSequence = sequence; } } } flower_destructSequenceIterator(sequenceIt); assert(referenceSequence != NULL); assert(otherReferenceSequence != NULL); FILE *fileHandle = fopen(outputFile, "w"); fprintf( fileHandle, "<coverageStats referenceSequenceLength=\"%i\" otherReferenceSequenceLength=\"%i\">\n", sequence_getLength(referenceSequence), sequence_getLength(otherReferenceSequence)); EventTree_Iterator *eventIt = eventTree_getIterator( flower_getEventTree(flower)); eventNumber = eventTree_getEventNumber(flower_getEventTree(flower)); Event * event; totalBaseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1); totalReferenceBases = 0; totalOtherReferenceBases = 0; int32_t totalSamples = 0; ignoreOtherReferenceBlocks = 0; while ((event = eventTree_getNext(eventIt)) != NULL) { sampleEventString = event_getHeader(event); if (sampleEventString != NULL && strcmp(sampleEventString, "ROOT") != 0 && strcmp(sampleEventString, "") != 0) { baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1); baseCoverages[0] = strcmp(sampleEventString, referenceEventString) != 0 ? getTotalLengthOfAdjacencies(flower, sampleEventString) : 0; referenceBases = 0; otherReferenceBases = 0; getMAFs(flower, fileHandle, getMAFBlock2); if(strcmp(sampleEventString, referenceEventString) == 0) { for(int32_t i=2; i<eventNumber + 1; i++) { baseCoverages[i-1] = baseCoverages[i]; } baseCoverages[eventNumber] = 0; } printStatsForSample( strcmp(sampleEventString, referenceEventString) != 0 && strcmp(sampleEventString, outgroupEventString) != 0, fileHandle, 1); free(baseCoverages); totalSamples += (strcmp(sampleEventString, referenceEventString) != 0 && strcmp(sampleEventString, outgroupEventString) != 0) ? 1 : 0; } } eventTree_destructIterator(eventIt); //Do average base coverages.. sampleEventString = "average"; baseCoverages = totalBaseCoverages; referenceBases = totalReferenceBases; otherReferenceBases = totalOtherReferenceBases; printStatsForSample(0, fileHandle, totalSamples); //Do all.. sampleEventString = referenceEventString; baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1); baseCoverages[0] = totalBaseCoverages[0]; referenceBases = 0; getMAFs(flower, fileHandle, getMAFBlock2); for(int32_t i=2; i<eventNumber + 1; i++) { baseCoverages[i-1] = baseCoverages[i]; } baseCoverages[eventNumber] = 0; otherReferenceBases = sequence_getLength(otherReferenceSequence); sampleEventString = "all"; printStatsForSample(0, fileHandle, 1); free(baseCoverages); //Do blocks without other reference ignoreOtherReferenceBlocks = 1; sampleEventString = referenceEventString; baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1); baseCoverages[0] = totalBaseCoverages[0] - getTotalLengthOfAdjacencies(flower, otherReferenceEventString); referenceBases = 0; otherReferenceBases = 0; getMAFs(flower, fileHandle, getMAFBlock2); for(int32_t i=2; i<eventNumber + 1; i++) { baseCoverages[i-1] = baseCoverages[i]; } baseCoverages[eventNumber] = 0; sampleEventString = "minusOtherReference"; printStatsForSample(0, fileHandle, 1); free(baseCoverages); fprintf(fileHandle, "</coverageStats>\n"); st_logInfo("Finished writing out the stats.\n"); fclose(fileHandle); return 0; }