Ejemplo n.º 1
0
bool linked(Segment *segmentX, Segment *segmentY, int64_t difference,
        const char *eventString, bool *aligned) {
    assert(segment_getStrand(segmentX));
    assert(segment_getStrand(segmentY));
    *aligned = 0;
    if (segment_getStart(segmentX) < segment_getStart(segmentY)) {
        Block *blockX = segment_getBlock(segmentX);
        Block *blockY = segment_getBlock(segmentY);
        Block_InstanceIterator *instanceItX = block_getInstanceIterator(blockX);
        Segment *segmentX2;
        while ((segmentX2 = block_getNext(instanceItX)) != NULL) {
            if (strcmp(event_getHeader(segment_getEvent(segmentX2)),
                    eventString) == 0) {
                Block_InstanceIterator *instanceItY =
                        block_getInstanceIterator(blockY);
                Segment *segmentY2;
                while ((segmentY2 = block_getNext(instanceItY)) != NULL) {
                    if (strcmp(event_getHeader(segment_getEvent(segmentY2)),
                            eventString) == 0) {
                        *aligned = 1;
                        if (sequence_getMetaSequence(
                                segment_getSequence(segmentX2))
                                == sequence_getMetaSequence(
                                        segment_getSequence(segmentY2))) { //Have the same assembly sequence
                            //Now check if the two segments are connected by a path of adjacency from the 3' end of segmentX to the 5' end of segmentY.
                            int64_t separationDistance;
                            if (capsAreAdjacent(segment_get3Cap(segmentX2),
                                    segment_get5Cap(segmentY2),
                                    &separationDistance)) {
                                //if(difference < 10000 || (separationDistance <=  difference * 1.5 && difference <= separationDistance * 1.5)) {
                                block_destructInstanceIterator(instanceItX);
                                block_destructInstanceIterator(instanceItY);
                                return 1;
                                //}
                            }
                        }
                    }
                }
                block_destructInstanceIterator(instanceItY);
            }
        }
        block_destructInstanceIterator(instanceItX);
    } else {
        assert(segmentX == segmentY);
        if(hasCapInEvent(block_get5End(segment_getBlock(segmentX)),
                eventString)) {
            *aligned = 1;
            return 1;
        }
    }
    return 0;
}
Ejemplo n.º 2
0
static stList *getSubstringsForFlowerSegments(stList *flowers) {
    /*
     * Get the set of substrings representing the strings in the segments of the given flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *blockIt = flower_getBlockIterator(flower);
        Block *block;
        while ((block = flower_getNextBlock(blockIt)) != NULL) {
            Block_InstanceIterator *instanceIt = block_getInstanceIterator(block);
            Segment *segment;
            while ((segment = block_getNext(instanceIt)) != NULL) {
                Sequence *sequence;
                if ((sequence = segment_getSequence(segment)) != NULL) {
                    segment = segment_getStrand(segment) ? segment : segment_getReverse(segment);
                    assert(segment_getLength(segment) > 0);
                    stList_append(substrings,
                            substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                    segment_getStart(segment) - sequence_getStart(sequence),
                                    segment_getLength(segment)));
                }
            }
            block_destructInstanceIterator(instanceIt);
        }
        flower_destructBlockIterator(blockIt);
    }
    return substrings;
}
Ejemplo n.º 3
0
static void getMAFBlock2(Block *block, FILE *fileHandle) {
    if (block_getLength(block) >= minimumBlockLength) {

        //Calculate bases in the reference and other reference sequence
        Block_InstanceIterator *instanceIt = block_getInstanceIterator(block);
        bool includesReference = 0;
        bool includesOtherReference = 0;
        Segment *segment;
        while ((segment = block_getNext(instanceIt)) != NULL) {
            const char *segmentEvent = event_getHeader(
                    segment_getEvent(segment));
            if (strcmp(segmentEvent, referenceEventString) == 0) {
                includesReference = 1;
            } else if (strcmp(segmentEvent, otherReferenceEventString) == 0) {
                includesOtherReference = 1;
            }
        }
        block_destructInstanceIterator(instanceIt);
        if (ignoreOtherReferenceBlocks && includesOtherReference) {
            return;
        }

        stSortedSet *otherSampleEvents = stSortedSet_construct3(
                (int(*)(const void *, const void *)) strcmp, NULL);
        instanceIt = block_getInstanceIterator(block);
        int32_t sampleNumber = 0;
        while ((segment = block_getNext(instanceIt)) != NULL) {
            const char *segmentEvent = event_getHeader(
                    segment_getEvent(segment));
            if (strcmp(segmentEvent, sampleEventString) == 0) {
                sampleNumber++;
            } else if (strcmp(segmentEvent, referenceEventString) != 0) {
                stSortedSet_insert(otherSampleEvents, (void *) segmentEvent);
            }
        }
        block_destructInstanceIterator(instanceIt);
        baseCoverages[stSortedSet_size(otherSampleEvents)] += block_getLength(
                block) * sampleNumber;
        stSortedSet_destruct(otherSampleEvents);

        referenceBases += includesReference ? block_getLength(block)
                * sampleNumber : 0;
        otherReferenceBases += includesOtherReference ? block_getLength(block)
                * sampleNumber : 0;
    }
}
Ejemplo n.º 4
0
void testBlock_instanceIterator(CuTest* testCase) {
    cactusBlockTestSetup();
    Block_InstanceIterator *iterator;
    iterator = block_getInstanceIterator(block);

    CuAssertTrue(testCase, iterator != NULL);
    CuAssertTrue(testCase, block_getNext(iterator) == segment_getReverse(rootSegment));
    CuAssertTrue(testCase, block_getNext(iterator) == leaf1Segment);

    Block_InstanceIterator *iterator2 = block_copyInstanceIterator(iterator);

    CuAssertTrue(testCase, block_getNext(iterator) == segment_getReverse(leaf2Segment));
    CuAssertTrue(testCase, block_getNext(iterator) == NULL);
    CuAssertTrue(testCase, block_getPrevious(iterator) == segment_getReverse(leaf2Segment));
    CuAssertTrue(testCase, block_getPrevious(iterator) == leaf1Segment);
    CuAssertTrue(testCase, block_getPrevious(iterator) == segment_getReverse(rootSegment));
    CuAssertTrue(testCase, block_getPrevious(iterator) == NULL);

    CuAssertTrue(testCase, block_getNext(iterator2) == segment_getReverse(leaf2Segment));
    CuAssertTrue(testCase, block_getNext(iterator2) == NULL);
    CuAssertTrue(testCase, block_getPrevious(iterator2) == segment_getReverse(leaf2Segment));
    CuAssertTrue(testCase, block_getPrevious(iterator2) == leaf1Segment);
    CuAssertTrue(testCase, block_getPrevious(iterator2) == segment_getReverse(rootSegment));
    CuAssertTrue(testCase, block_getPrevious(iterator2) == NULL);

    block_destructInstanceIterator(iterator);
    block_destructInstanceIterator(iterator2);

    iterator = block_getInstanceIterator(block_getReverse(block));
    CuAssertTrue(testCase, block_getNext(iterator) == rootSegment);
    CuAssertTrue(testCase, block_getNext(iterator) == segment_getReverse(leaf1Segment));
    CuAssertTrue(testCase, block_getNext(iterator) == leaf2Segment);
    CuAssertTrue(testCase, block_getNext(iterator) == NULL);
    CuAssertTrue(testCase, block_getPrevious(iterator) == leaf2Segment);
    CuAssertTrue(testCase, block_getPrevious(iterator) == segment_getReverse(leaf1Segment));
    CuAssertTrue(testCase, block_getPrevious(iterator) == rootSegment);
    CuAssertTrue(testCase, block_getPrevious(iterator) == NULL);

    block_destructInstanceIterator(iterator);

    cactusBlockTestTeardown();
}
Ejemplo n.º 5
0
Segment *block_getSegmentForEvent(Block *block, Name eventName) {
    /*
     * Get the segment for a given event.
     */
    Block_InstanceIterator *it = block_getInstanceIterator(block);
    Segment *segment;
    while ((segment = block_getNext(it)) != NULL) {
        if (event_getName(segment_getEvent(segment)) == eventName) {
            block_destructInstanceIterator(it);
            return segment;
        }
    }
    block_destructInstanceIterator(it);
    return NULL;
}
Ejemplo n.º 6
0
void block_check(Block *block) {
	//Check is connected to flower properly
	cactusCheck(flower_getBlock(block_getFlower(block), block_getName(block)) == block_getPositiveOrientation(block));
	//Check we have actually set built blocks for the flower..
	cactusCheck(flower_builtBlocks(block_getFlower(block)));

	//Checks the two ends are block ends.
	End *_5End = block_get5End(block);
	End *_3End = block_get3End(block);
	cactusCheck(end_isBlockEnd(_5End));
	cactusCheck(end_isBlockEnd(_3End));
	cactusCheck(end_getOrientation(_5End) == block_getOrientation(block));
	cactusCheck(end_getOrientation(_3End) == block_getOrientation(block));
	cactusCheck(end_getBlock(_5End) == block);
	cactusCheck(end_getBlock(_3End) == block);
	cactusCheck(end_getSide(_5End)); //Check the sides of the ends are consistent.
	cactusCheck(!end_getSide(_3End));

	cactusCheck(block_getLength(block) > 0); //check block has non-zero length

	//Check reverse
	Block *rBlock = block_getReverse(block);
	cactusCheck(rBlock != NULL);
	cactusCheck(block_getReverse(block) == rBlock);
	cactusCheck(block_getOrientation(block) == !block_getOrientation(rBlock));
	cactusCheck(block_getLength(block) == block_getLength(rBlock));
	cactusCheck(block_get5End(block) == end_getReverse(block_get3End(rBlock)));
	cactusCheck(block_get3End(block) == end_getReverse(block_get5End(rBlock)));
	cactusCheck(block_getInstanceNumber(block) == block_getInstanceNumber(rBlock));
	if(block_getInstanceNumber(block) > 0) {
		cactusCheck(block_getFirst(block) == segment_getReverse(block_getFirst(rBlock)));
		if(block_getRootInstance(block) == NULL) {
			cactusCheck(block_getRootInstance(rBlock) == NULL);
		}
		else {
			cactusCheck(block_getRootInstance(block) == segment_getReverse(block_getRootInstance(rBlock)));
		}
	}

	//For each segment calls segment_check.
	Block_InstanceIterator *iterator = block_getInstanceIterator(block);
	Segment *segment;
	while((segment = block_getNext(iterator)) != NULL) {
		segment_check(segment);
	}
	block_destructInstanceIterator(iterator);
}
Ejemplo n.º 7
0
void block_split(Block *block, int64_t splitPoint, Block **leftBlock, Block **rightBlock) {
	assert(splitPoint > 0);
	assert(splitPoint < block_getLength(block));
	*leftBlock = block_construct(splitPoint, block_getFlower(block));
	*rightBlock = block_construct(block_getLength(block) - splitPoint, block_getFlower(block));

	Segment *segment = block_getRootInstance(block);
	if(segment != NULL) {
		block_splitP2(segment, NULL, NULL, *leftBlock, *rightBlock);
	}
	else {
		Block_InstanceIterator *instanceIterator = block_getInstanceIterator(block);
		while((segment = block_getNext(instanceIterator)) != NULL) {
			block_splitP(segment, *leftBlock, *rightBlock);
		}
		block_destructInstanceIterator(instanceIterator);
	}
	block_destruct(block);
}
Ejemplo n.º 8
0
static bool duplicated(Segment *segment) {
    Sequence *sequence = segment_getSequence(segment);
    assert(sequence != NULL);
    MetaSequence *metaSequence = sequence_getMetaSequence(sequence);
    Block *block = segment_getBlock(segment);
    Block_InstanceIterator *it = block_getInstanceIterator(block);
    Segment *segment2;
    while((segment2 = block_getNext(it)) != NULL) {
       if(segment != segment2) {
           assert(segment != segment_getReverse(segment2));
           Sequence *sequence2 = segment_getSequence(segment2);
           if(sequence2 != NULL && sequence_getMetaSequence(sequence2) == metaSequence) {
               block_destructInstanceIterator(it);
               return 1;
           }
       }
    }
    block_destructInstanceIterator(it);
    return 0;
}
static void getSnpStats(Block *block, FILE *fileHandle) {
    if (block_getLength(block) >= minimumBlockLength) {
        //Now get the column
        Block_InstanceIterator *instanceIterator = block_getInstanceIterator(block);
        Segment *segment;
        char *hap1Seq = NULL;
        char *hap2Seq = NULL;
        char *assemblySeq = NULL;
        Segment *hap1Segment = NULL;
        Segment *hap2Segment = NULL;
        while ((segment = block_getNext(instanceIterator)) != NULL) {
            if (strcmp(event_getHeader(segment_getEvent(segment)), hap1EventString) == 0) {
                if (hap1Seq != NULL) {
                    goto end;
                }
                hap1Seq = segment_getString(segment);
                hap1Segment = segment;
            }
            if (strcmp(event_getHeader(segment_getEvent(segment)), hap2EventString) == 0) {
                if (hap2Seq != NULL) {
                    goto end;
                }
                hap2Seq = segment_getString(segment);
                hap2Segment = segment;
            }
            if (strcmp(event_getHeader(segment_getEvent(segment)), assemblyEventString) == 0) {
                if (assemblySeq != NULL) {
                    goto end;
                }
                assemblySeq = segment_getString(segment);
            }
        }

        assert(minimumIndentity >= 0);
        assert(minimumIndentity <= 100);
        if (hap1Seq != NULL || hap2Seq != NULL) {
            if (hap1Seq != NULL) {
                assert(strlen(hap1Seq) == block_getLength(block));
            }
            if (hap2Seq != NULL) {
                assert(strlen(hap2Seq) == block_getLength(block));
            }
            if (assemblySeq != NULL) {
                assert(strlen(assemblySeq) == block_getLength(block));
            }
            double homoMatches = 0;
            double matches = 0;
            for (int64_t i = ignoreFirstNBasesOfBlock; i < block_getLength(block) - ignoreFirstNBasesOfBlock; i++) {
                if (hap1Seq != NULL && hap2Seq != NULL) {
                    if (toupper(hap1Seq[i]) == toupper(hap2Seq[i])) {
                        homoMatches++;
                    }
                } else {
                    homoMatches = INT64_MAX;
                }
                if (assemblySeq != NULL) {
                    if (hap1Seq != NULL) {
                        if (hap2Seq != NULL) {
                            if (toupper(hap1Seq[i]) == toupper(hap2Seq[i]) && toupper(hap1Seq[i]) == toupper(
                                    assemblySeq[i])) {
                                matches++;
                            }
                        } else {
                            if (toupper(hap1Seq[i]) == toupper(assemblySeq[i])) {
                                matches++;
                            }
                        }
                    } else {
                        assert(hap2Seq != NULL);
                        if (toupper(hap2Seq[i]) == toupper(assemblySeq[i])) {
                            matches++;
                        }
                    }
                } else {
                    matches = INT64_MAX;
                }
            }
            double homoIdentity = 100.0 * homoMatches / (block_getLength(block) - 2.0 * ignoreFirstNBasesOfBlock);
            double identity = 100.0 * matches / (block_getLength(block) - 2.0 * ignoreFirstNBasesOfBlock);

            if (homoIdentity >= minimumIndentity && identity >= minimumIndentity) {
                //We're in gravy.
                for (int64_t i = ignoreFirstNBasesOfBlock; i < block_getLength(block) - ignoreFirstNBasesOfBlock; i++) {

                    if (hap1Seq != NULL) {
                        if (hap2Seq != NULL) {
                            if (toupper(hap1Seq[i]) == toupper(hap2Seq[i])) {
                                totalSites++;
                                if (assemblySeq != NULL) {
                                    totalCorrect += bitsScoreFn(assemblySeq[i], hap1Seq[i]);
                                    totalErrors += correctFn(assemblySeq[i], hap1Seq[i]) ? 0 : 1;
                                    totalCalls++;
                                }
                            } else {
                                totalHeterozygous++;
                                if (assemblySeq != NULL) {
                                    assert(toupper(hap1Seq[i]) != toupper(hap2Seq[i]));
                                    totalCorrectInHeterozygous += bitsScoreFn(assemblySeq[i], hap1Seq[i]);
                                    totalCorrectHap1InHeterozygous += bitsScoreFn(assemblySeq[i], hap1Seq[i]);
                                    totalCorrectInHeterozygous += bitsScoreFn(assemblySeq[i], hap2Seq[i]);
                                    totalCorrectHap2InHeterozygous += bitsScoreFn(assemblySeq[i], hap2Seq[i]);
                                    totalErrorsInHeterozygous += (correctFn(assemblySeq[i], hap1Seq[i]) || correctFn(
                                            assemblySeq[i], hap2Seq[i])) ? 0 : 1;
                                    totalCallsInHeterozygous++;
                                    if (!(correctFn(assemblySeq[i], hap1Seq[i])
                                            || correctFn(assemblySeq[i], hap2Seq[i]))) {
                                        stList_append(hetPositions, segmentHolder_construct(hap1Segment, i, assemblySeq[i], hap1Seq[i], hap2Seq[i]));
                                    }
                                }
                            }
                        } else {
                            totalInOneHaplotypeOnly++;
                            if (assemblySeq != NULL) {
                                totalCorrectInOneHaplotype += bitsScoreFn(assemblySeq[i], hap1Seq[i]);
                                totalErrorsInOneHaplotype += correctFn(assemblySeq[i], hap1Seq[i]) ? 0 : 1;
                                totalCallsInOneHaplotype++;
                                if (!correctFn(assemblySeq[i], hap1Seq[i])) {
                                    stList_append(indelPositions, segmentHolder_construct(hap1Segment, i, assemblySeq[i], hap1Seq[i], 'N'));
                                }
                            }
                        }
                    } else {
                        if (hap2Seq != NULL) {
                            totalInOneHaplotypeOnly++;
                            if (assemblySeq != NULL) {
                                totalCorrectInOneHaplotype += bitsScoreFn(assemblySeq[i], hap2Seq[i]);
                                totalErrorsInOneHaplotype += correctFn(assemblySeq[i], hap2Seq[i]) ? 0 : 1;
                                totalCallsInOneHaplotype++;
                                if (!correctFn(assemblySeq[i], hap2Seq[i])) {
                                    stList_append(indelPositions, segmentHolder_construct(hap2Segment, i, assemblySeq[i], 'N', hap2Seq[i]));
                                }
                            }
                        }
                    }
                }
            }
        }

        end:
        //cleanup
        if (hap1Seq != NULL) {
            free(hap1Seq);
        }
        if (hap2Seq != NULL) {
            free(hap2Seq);
        }
        if (assemblySeq != NULL) {
            free(assemblySeq);
        }
        block_destructInstanceIterator(instanceIterator);
    }
}