예제 #1
0
void testEnd_getName(CuTest* testCase) {
    cactusEndTestSetup();
    CuAssertTrue(testCase, end_getName(end) != NULL_NAME);
    CuAssertTrue(testCase, flower_getEnd(flower, end_getName(end)) == end);
    CuAssertTrue(testCase, flower_getEnd(flower, end_getName(end_getReverse(end))) == end);
    cactusEndTestTeardown();
}
예제 #2
0
void testEnd_setGroup(CuTest* testCase) {
    cactusEndTestSetup();
    Flower *flower2 = flower_construct(cactusDisk);
    Group *group2 = group_construct2(flower2);
    End *end2 = end_construct(1, flower2);
    End *end3 = end_construct(1, flower2);
    CuAssertTrue(testCase, group_getEndNumber(group2) == 0);
    CuAssertTrue(testCase, end_getGroup(end2) == NULL);
    CuAssertTrue(testCase, end_getGroup(end3) == NULL);
    end_setGroup(end2, group2);
    CuAssertTrue(testCase, group_getEndNumber(group2) == 1);
    CuAssertTrue(testCase, end_getGroup(end2) == group2);
    CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2);
    CuAssertTrue(testCase, end_getGroup(end3) == NULL);
    end_setGroup(end3, group2);
    CuAssertTrue(testCase, group_getEndNumber(group2) == 2);
    CuAssertTrue(testCase, end_getGroup(end2) == group2);
    CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2);
    CuAssertTrue(testCase, end_getGroup(end3) == group2);
    CuAssertTrue(testCase, group_getEnd(group2, end_getName(end3)) == end3);
    end_setGroup(end3, NULL);
    end_setGroup(end2, group2);
    CuAssertTrue(testCase, group_getEndNumber(group2) == 1);
    CuAssertTrue(testCase, end_getGroup(end2) == group2);
    CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2);
    CuAssertTrue(testCase, end_getGroup(end3) == NULL);
    cactusEndTestTeardown();
}
예제 #3
0
void testGroup_updateContainedEnds(CuTest* testCase) {
    cactusGroupTestSetup();
    end_copyConstruct(end3, nestedFlower);
    CuAssertTrue(testCase, group_getEndNumber(group) == 2);
    group_updateContainedEnds(group);
    CuAssertTrue(testCase, group_getEndNumber(group) == 3);
    CuAssertTrue(testCase, group_getEnd(group, end_getName(end1)) == end1);
    CuAssertTrue(testCase, group_getEnd(group, end_getName(end2)) == end2);
    CuAssertTrue(testCase, group_getEnd(group, end_getName(end3)) == end3);
    cactusGroupTestTeardown();
}
예제 #4
0
void testEnd_copyConstruct(CuTest* testCase) {
    cactusEndTestSetup();
    Flower *flower2 = flower_construct(cactusDisk);
    eventTree_copyConstruct(eventTree, flower2, testEnd_copyConstructP);
    sequence_construct(metaSequence, flower2);

    End *end2 = end_copyConstruct(end, flower2);
    CuAssertTrue(testCase, end_getName(end2) != NULL_NAME);
    CuAssertTrue(testCase, end_getName(end2) == end_getName(end));
    CuAssertTrue(testCase, flower_getEnd(flower2, end_getName(end2)) == end2);
    CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(rootCap))) == cap_getName(rootCap));
    CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(leaf1Cap))) == cap_getName(leaf1Cap));
    CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(leaf2Cap))) == cap_getName(leaf2Cap));
    cactusEndTestTeardown();
}
static void recoverBrokenAdjacencies(Flower *flower, stList *recoveredCaps, Name referenceEventName) {
    /*
     * Find reference intervals that are book-ended by stubs created in a child flower.
     */
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while((group = flower_getNextGroup(groupIt)) != NULL) {
        Flower *nestedFlower;
        if((nestedFlower = group_getNestedFlower(group)) != NULL) {
            Flower_EndIterator *endIt = flower_getEndIterator(nestedFlower);
            End *childEnd;
            while((childEnd = flower_getNextEnd(endIt)) != NULL) {
                if(end_isStubEnd(childEnd) && flower_getEnd(flower, end_getName(childEnd)) == NULL) { //We have a thread we need to promote
                    Cap *childCap = getCapForReferenceEvent(childEnd, referenceEventName); //The cap in the reference
                    assert(childCap != NULL);
                    assert(!end_isAttached(childEnd));
                    childCap = cap_getStrand(childCap) ? childCap : cap_getReverse(childCap);
                    if (!cap_getSide(childCap)) {
                        Cap *adjacentChildCap = NULL;
                        int64_t adjacencyLength = traceThreadLength(childCap, &adjacentChildCap);
                        Cap *cap = copyCapToParent(childCap, recoveredCaps);
                        assert(adjacentChildCap != NULL);
                        assert(!end_isAttached(cap_getEnd(adjacentChildCap)));
                        assert(!cap_getSide(cap));
                        Cap *adjacentCap = copyCapToParent(adjacentChildCap, recoveredCaps);
                        cap_makeAdjacent(cap, adjacentCap);
                        setAdjacencyLength(cap, adjacentCap, adjacencyLength);
                    }
                }
            }
            flower_destructEndIterator(endIt);
        }
    }
    flower_destructGroupIterator(groupIt);
}
예제 #6
0
void testGroup_addEnd(CuTest *testCase) {
    cactusGroupTestSetup();
    CuAssertTrue(testCase, group_getEndNumber(group2) == 0);
    end_setGroup(end4, group2);
    CuAssertTrue(testCase, group_getEndNumber(group2) == 1);
    CuAssertTrue(testCase, end_getGroup(end4) == group2);
    CuAssertTrue(testCase, group_getEnd(group2, end_getName(end4)) == end4);
    cactusGroupTestTeardown();
}
예제 #7
0
void writeEndAlignmentToDisk(End *end, stSortedSet *endAlignment, FILE *fileHandle) {
    fprintf(fileHandle, "%s %" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), stSortedSet_size(endAlignment));
    stSortedSetIterator *it = stSortedSet_getIterator(endAlignment);
    AlignedPair *aP;
    while((aP = stSortedSet_getNext(it)) != NULL) {
        fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %i %" PRIi64 " ", aP->subsequenceIdentifier, aP->position, aP->strand, aP->score);
        aP = aP->reverse;
        fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %i %" PRIi64 "\n", aP->subsequenceIdentifier, aP->position, aP->strand, aP->score);
    }
    stSortedSet_destructIterator(it);
}
예제 #8
0
void testGroup_makeNonLeaf(CuTest *testCase) {
    cactusGroupTestSetup();
    CuAssertTrue(testCase, group_isLeaf(group2));
    end_setGroup(end4, group2);
    group_makeNestedFlower(group2);
    CuAssertTrue(testCase, !group_isLeaf(group2));
    Flower *nestedFlower = group_getNestedFlower(group2);
    CuAssertTrue(testCase, nestedFlower != NULL);
    CuAssertTrue(testCase, !flower_builtBlocks(flower));
    CuAssertTrue(testCase, !flower_builtTrees(flower));
    CuAssertTrue(testCase, !flower_builtFaces(flower));
    CuAssertTrue(testCase, flower_getName(nestedFlower) == group_getName(group2));
    CuAssertTrue(testCase, flower_getParentGroup(nestedFlower) == group2);
    CuAssertTrue(testCase, flower_getEndNumber(nestedFlower) == 1);
    End *nestedEnd = flower_getFirstEnd(nestedFlower);
    CuAssertTrue(testCase, end_getName(end4) == end_getName(nestedEnd));
    CuAssertTrue(testCase, end_getGroup(nestedEnd) != NULL);
    CuAssertTrue(testCase, flower_getGroupNumber(nestedFlower) == 1);
    CuAssertTrue(testCase, flower_isTerminal(nestedFlower));
    cactusGroupTestTeardown();
}
bool endsAreConnected(End *end1, End *end2, stList *eventStrings) {
    if (end_getName(end1) == end_getName(end2)) { //Then the ends are the same and are part of the same chromosome by definition.
        End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1);
        Cap *cap1;
        while ((cap1 = end_getNext(instanceIterator)) != NULL) {
            if (capHasGivenEvents(cap1, eventStrings)) {
                end_destructInstanceIterator(instanceIterator);
                return 1;
            }
        }
        return 0;
    }
    End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1);
    Cap *cap1;
    while ((cap1 = end_getNext(instanceIterator)) != NULL) {
        if (capHasGivenEvents(cap1, eventStrings)) {
            End_InstanceIterator *instanceIterator2 = end_getInstanceIterator(end2);
            Cap *cap2;
            while ((cap2 = end_getNext(instanceIterator2)) != NULL) {
                assert(cap_getName(cap2) != cap_getName(cap1)); //This could only happen if end1 == end2
                if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) {
                    assert(strcmp(event_getHeader(cap_getEvent(cap1)),
                                    event_getHeader(cap_getEvent(cap2))) == 0);
                    assert(cap_getPositiveOrientation(cap1)
                            != cap_getPositiveOrientation(cap2));
                    assert(cap_getName(cap1) != cap_getName(cap2));
                    //they could have the same coordinate if they represent two ends of a block of length 1.

                    end_destructInstanceIterator(instanceIterator);
                    end_destructInstanceIterator(instanceIterator2);
                    return 1;
                }
            }
            end_destructInstanceIterator(instanceIterator2);
        }
    }
    end_destructInstanceIterator(instanceIterator);
    return 0;
}
예제 #10
0
bool flower_removeIfRedundant(Flower *flower) {
    if (!flower_isLeaf(flower) && flower_getParentGroup(flower) != NULL && flower_getBlockNumber(flower) == 0) { //We will remove this flower..
        Group *parentGroup = flower_getParentGroup(flower); //This group will be destructed
        //Deal with any parent chain..
        if (group_isLink(parentGroup)) {
            link_split(group_getLink(parentGroup));
        }
        Flower *parentFlower = group_getFlower(parentGroup); //We will add the groups in the flower to the parent

        /*
         * For each group in the flower we take its nested flower and attach it to the parent.
         */
        Group *group;
        Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
        while ((group = flower_getNextGroup(groupIt)) != NULL) {
            if (!group_isLeaf(group)) {
                //Copy the group into the parent..
                Flower *nestedFlower = group_getNestedFlower(group);
                assert(nestedFlower != NULL);
                Group *newParentGroup = group_construct(parentFlower, nestedFlower);
                flower_setParentGroup(nestedFlower, newParentGroup);
                group_constructChainForLink(newParentGroup);
            } else {
                Group *newParentGroup = group_construct2(parentFlower);
                End *end;
                Group_EndIterator *endIt = group_getEndIterator(group);
                while ((end = group_getNextEnd(endIt)) != NULL) {
                    End *parentEnd = flower_getEnd(parentFlower, end_getName(end));
                    assert(parentEnd != NULL);
                    end_setGroup(parentEnd, newParentGroup);
                }
                group_destructEndIterator(endIt);
                group_constructChainForLink(newParentGroup);
            }
        }
        flower_destructGroupIterator(groupIt);

        //The group attached to the flower should now be empty
        assert(group_getEndNumber(parentGroup) == 0);
        group_destruct(parentGroup);

        //Now wipe the flower out..
        cactusDisk_deleteFlowerFromDisk(flower_getCactusDisk(flower), flower);
        flower_destruct(flower, 0);
        return 1;
    }
    return 0;
}
예제 #11
0
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength,
        bool useProgressiveMerging, float gapGamma,
        PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) {
    //Make an alignment of the sequences in the ends

    //Get the adjacency sequences to be aligned.
    Cap *cap;
    End_InstanceIterator *it = end_getInstanceIterator(end);
    stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct);
    stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct);
    stHash *endInstanceNumbers = stHash_construct2(NULL, free);
    while((cap = end_getNext(it)) != NULL) {
        if(cap_getSide(cap)) {
            cap = cap_getReverse(cap);
        }
        AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength);
        stList_append(sequences, adjacencySequence);
        assert(cap_getAdjacency(cap) != NULL);
        End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap)));
        stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd)));
        //Increase count of seqfrags with a given end.
        int64_t *c = stHash_search(endInstanceNumbers, otherEnd);
        if(c == NULL) {
            c = st_calloc(1, sizeof(int64_t));
            assert(*c == 0);
            stHash_insert(endInstanceNumbers, otherEnd, c);
        }
        (*c)++;
    }
    end_destructInstanceIterator(it);

    //Get the alignment.
    MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters);

    //Build an array of weights to reweight pairs in the alignment.
    int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing
    //common ends.
    for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) {
        stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i);
        int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1);
        int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2);
        assert(seq1 != seq2);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seq1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seq2);
        int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId
                ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds;
        pairwiseAlignmentsPerSequence[seq1]++;
        pairwiseAlignmentsPerSequence[seq2]++;
    }
    //Now calculate score adjustments.
    double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    for(int64_t i=0; i<stList_length(seqFrags); i++) {
        SeqFrag *seqFrag = stList_get(seqFrags, i);
        End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId);
        assert(otherEnd != NULL);
        assert(stHash_search(endInstanceNumbers, otherEnd) != NULL);
        int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd);
        int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber;

        assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0);

        //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]);
        //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i];
        if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) {
            scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i];
            assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber);
        }
        else {
            scoreAdjustmentsNonCommonEnds[i] = INT64_MIN;
        }
        if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) {
            scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i];
            assert(scoreAdjustmentsCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1);
        }
        else {
            scoreAdjustmentsCommonEnds[i] = INT64_MIN;
        }
    }

	//Convert the alignment pairs to an alignment of the caps..
    stSortedSet *sortedAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    while(stList_length(mA->alignedPairs) > 0) {
        stIntTuple *alignedPair = stList_pop(mA->alignedPairs);
        assert(stIntTuple_length(alignedPair) == 5);
        int64_t seqIndex1 = stIntTuple_get(alignedPair, 1);
        int64_t seqIndex2 = stIntTuple_get(alignedPair, 3);
        AdjacencySequence *i = stList_get(sequences, seqIndex1);
        AdjacencySequence *j = stList_get(sequences, seqIndex2);
        assert(i != j);
        int64_t offset1 = stIntTuple_get(alignedPair, 2);
        int64_t offset2 = stIntTuple_get(alignedPair, 4);
        int64_t score = stIntTuple_get(alignedPair, 0);
        if(score <= 0) { //Happens when indel probs are included
            score = 1; //This is the minimum
        }
        assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2);
        assert(seqFrag1 != seqFrag2);
        double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds;
        assert(scoreAdjustments[seqIndex1] != INT64_MIN);
        assert(scoreAdjustments[seqIndex2] != INT64_MIN);
        AlignedPair *alignedPair2 = alignedPair_construct(
                i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand,
                j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand,
                score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here.
        assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL);
        assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL);
        stSortedSet_insert(sortedAlignment, alignedPair2);
        stSortedSet_insert(sortedAlignment, alignedPair2->reverse);
        stIntTuple_destruct(alignedPair);
    }

    //Cleanup
    stList_destruct(seqFrags);
    stList_destruct(sequences);
    free(pairwiseAlignmentsPerSequenceNonCommonEnds);
    free(pairwiseAlignmentsPerSequenceCommonEnds);
    free(scoreAdjustmentsNonCommonEnds);
    free(scoreAdjustmentsCommonEnds);
    multipleAlignment_destruct(mA);
    stHash_destruct(endInstanceNumbers);

    return sortedAlignment;
}
예제 #12
0
void testGroup_getEnd(CuTest* testCase) {
    cactusGroupTestSetup();
    CuAssertTrue(testCase, group_getEnd(group, end_getName(end1)) == end1);
    CuAssertTrue(testCase, group_getEnd(group, end_getName(end2)) == end2);
    cactusGroupTestTeardown();
}
예제 #13
0
static int flower_constructEndsP(const void *o1, const void *o2) {
    return cactusMisc_nameCompare(end_getName((End *) o1), end_getName((End *) o2));
}
예제 #14
0
void testFlower_removeIfRedundant(CuTest *testCase) {
    /*
     * Do a simple test to see if function can remove a redundant flower.
     */
    cactusFlowerTestSetup();
    endsSetup();

    //First construct a redundant flower from the root.
    Flower *flower2 = flower_construct(cactusDisk);
    Group *group = group_construct(flower, flower2);
    end_setGroup(end, group);
    end_setGroup(end2, group);

    //Now hang another couple of flowers of that.
    Flower *flower3 = flower_construct(cactusDisk);
    group_construct(flower2, flower3);

    //Now hang another flower of that.
    Group *group3b = group_construct2(flower2);

    //Finally hang one more flower on the end..
    Flower *flower4 = flower_construct(cactusDisk);
    group_construct(flower3, flower4);

    //Copy the ends into the flowers.
    end_copyConstruct(end, flower2);
    end_copyConstruct(end2, flower2);
    end_copyConstruct(end, flower3);
    end_setGroup(flower_getEnd(flower2, end_getName(end2)), group3b);
    end_copyConstruct(end, flower4);

    //st_uglyf("I got %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 "\n", flower_getName(flower), flower_getName(flower2), flower_getName(flower3), flower_getName(flower4));

    //Write the mess to disk.
    cactusDisk_write(cactusDisk);

    //Now test the removal function (check we get a negative on this leaf).
    CuAssertTrue(testCase, !flower_removeIfRedundant(flower4));
    //Check we can't remove the root..
    CuAssertTrue(testCase, !flower_removeIfRedundant(flower));

    //We will remove flower2

    //Before
    CuAssertTrue(testCase, flower_getGroupNumber(flower) == 1);
    CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower2)) == flower);

    CuAssertTrue(testCase, flower_removeIfRedundant(flower2));

    //After, check the flower/group connections
    CuAssertTrue(testCase, flower_getGroupNumber(flower) == 2);
    CuAssertTrue(testCase, !flower_isLeaf(flower));
    CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower3)) == flower);
    group3b = end_getGroup(end2);
    CuAssertTrue(testCase, group_getFlower(group3b) == flower);
    CuAssertTrue(testCase, group_isLeaf(group3b));
    CuAssertTrue(testCase, flower_getGroup(flower, flower_getName(flower3)) == flower_getParentGroup(flower3));
    //Check the ends..
    CuAssertTrue(testCase, flower_getEndNumber(flower) == 2);
    CuAssertTrue(testCase, flower_getEndNumber(flower3) == 1);
    CuAssertTrue(testCase, group_getEndNumber(group3b) == 1);
    CuAssertTrue(testCase, end_getGroup(end) == flower_getParentGroup(flower3));
    CuAssertTrue(testCase, end_getGroup(end2) == group3b);
    CuAssertTrue(testCase, flower_getEnd(flower3, end_getName(end)) != NULL);
    //Check the child of 3 is still okay..
    CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower4)) == flower3);

    //Now do removal of flower3
    CuAssertTrue(testCase, !flower_removeIfRedundant(flower));
    CuAssertTrue(testCase, !flower_removeIfRedundant(flower4));
    CuAssertTrue(testCase, flower_removeIfRedundant(flower3));
    //Check groups again
    CuAssertTrue(testCase, flower_getGroupNumber(flower) == 2);
    CuAssertTrue(testCase, !flower_isLeaf(flower));
    CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower4)) == flower);
    CuAssertTrue(testCase, group_getFlower(group3b) == flower);
    CuAssertTrue(testCase, flower_getGroup(flower, flower_getName(flower4)) == flower_getParentGroup(flower4));
    //Check the ends again..
    CuAssertTrue(testCase, flower_getEndNumber(flower) == 2);
    CuAssertTrue(testCase, flower_getEndNumber(flower4) == 1);
    CuAssertTrue(testCase, group_getEndNumber(group3b) == 1);
    CuAssertTrue(testCase, end_getGroup(end) == flower_getParentGroup(flower4));
    CuAssertTrue(testCase, end_getGroup(end2) == group3b);
    CuAssertTrue(testCase, flower_getEnd(flower4, end_getName(end)) != NULL);

    cactusFlowerTestTeardown();
}
예제 #15
0
int main(int argc, char *argv[]) {

    char * logLevelString = NULL;
    char * cactusDiskDatabaseString = NULL;
    int64_t i, j;
    int64_t spanningTrees = 10;
    int64_t maximumLength = 1500;
    bool useProgressiveMerging = 0;
    float matchGamma = 0.5;
    bool useBanding = 0;
    int64_t k;
    stList *listOfEndAlignmentFiles = NULL;
    char *endAlignmentsToPrecomputeOutputFile = NULL;
    bool calculateWhichEndsToComputeSeparately = 0;
    int64_t largeEndSize = 1000000;
    int64_t chainLengthForBigFlower = 1000000;
    int64_t longChain = 2;
    char *ingroupCoverageFilePath = NULL;
    int64_t minimumSizeToRescue = 1;
    double minimumCoverageToRescue = 0.0;

    PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters = pairwiseAlignmentBandingParameters_construct();

    /*
     * Setup the input parameters for cactus core.
     */
    bool pruneOutStubAlignments = 0;

    /*
     * Parse the options.
     */
    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, {
                "help", no_argument, 0, 'h' }, { "spanningTrees", required_argument, 0, 'i' },
                { "maximumLength", required_argument, 0, 'j' }, { "useBanding", no_argument, 0, 'k' },
                { "gapGamma", required_argument, 0, 'l' }, { "matchGamma", required_argument, 0, 'L' },
                { "splitMatrixBiggerThanThis", required_argument, 0, 'o' }, { "anchorMatrixBiggerThanThis",
                        required_argument, 0, 'p' }, { "repeatMaskMatrixBiggerThanThis", required_argument, 0, 'q' }, {
                        "diagonalExpansion", required_argument, 0, 'r' }, { "constraintDiagonalTrim", required_argument, 0, 't' }, {
                        "minimumDegree", required_argument, 0, 'u' }, { "alignAmbiguityCharacters", no_argument, 0, 'w' }, {
                        "pruneOutStubAlignments", no_argument, 0, 'y' }, {
                        "minimumIngroupDegree", required_argument, 0, 'A' }, { "minimumOutgroupDegree", required_argument, 0, 'B' },
                { "precomputedAlignments", required_argument, 0, 'D' }, {
                        "endAlignmentsToPrecomputeOutputFile", required_argument, 0, 'E' }, { "useProgressiveMerging",
                        no_argument, 0, 'F' }, { "calculateWhichEndsToComputeSeparately", no_argument, 0, 'G' }, { "largeEndSize",
                        required_argument, 0, 'I' },
                        {"ingroupCoverageFile", required_argument, 0, 'J'},
                        {"minimumSizeToRescue", required_argument, 0, 'K'},
                        {"minimumCoverageToRescue", required_argument, 0, 'M'},
                        { "minimumNumberOfSpecies", required_argument, 0, 'N' },
                        { 0, 0, 0, 0 } };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:hi:j:kl:o:p:q:r:t:u:wy:A:B:D:E:FGI:J:K:L:M:N:", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                st_setLogLevelFromString(logLevelString);
                break;
            case 'b':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            case 'i':
                i = sscanf(optarg, "%" PRIi64 "", &spanningTrees);
                (void) i;
                assert(i == 1);
                assert(spanningTrees >= 0);
                break;
            case 'j':
                i = sscanf(optarg, "%" PRIi64 "", &maximumLength);
                assert(i == 1);
                assert(maximumLength >= 0);
                break;
            case 'k':
                useBanding = !useBanding;
                break;
            case 'l':
                i = sscanf(optarg, "%f", &pairwiseAlignmentBandingParameters->gapGamma);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->gapGamma >= 0.0);
                break;
            case 'L':
                i = sscanf(optarg, "%f", &matchGamma);
                assert(i == 1);
                assert(matchGamma >= 0.0);
                break;
            case 'o':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->splitMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'p':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->anchorMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'q':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->repeatMaskMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'r':
                i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->diagonalExpansion);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->diagonalExpansion >= 0);
                assert(pairwiseAlignmentBandingParameters->diagonalExpansion % 2 == 0);
                break;
            case 't':
                i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->constraintDiagonalTrim);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->constraintDiagonalTrim >= 0);
                break;
            case 'u':
                i = sscanf(optarg, "%" PRIi64 "", &minimumDegree);
                assert(i == 1);
                break;
            case 'w':
                pairwiseAlignmentBandingParameters->alignAmbiguityCharacters = 1;
                break;
            case 'y':
                pruneOutStubAlignments = 1;
                break;
            case 'A':
                i = sscanf(optarg, "%" PRIi64 "", &minimumIngroupDegree);
                assert(i == 1);
                break;
            case 'B':
                i = sscanf(optarg, "%" PRIi64 "", &minimumOutgroupDegree);
                assert(i == 1);
                break;
            case 'D':
                listOfEndAlignmentFiles = stString_split(optarg);
                break;
            case 'E':
                endAlignmentsToPrecomputeOutputFile = stString_copy(optarg);
                break;
            case 'F':
                useProgressiveMerging = 1;
                break;
            case 'G':
                calculateWhichEndsToComputeSeparately = 1;
                break;
            case 'I':
                i = sscanf(optarg, "%" PRIi64 "", &largeEndSize);
                assert(i == 1);
                break;
            case 'J':
                ingroupCoverageFilePath = stString_copy(optarg);
                break;
            case 'K':
                i = sscanf(optarg, "%" PRIi64, &minimumSizeToRescue);
                assert(i == 1);
                break;
            case 'M':
                i = sscanf(optarg, "%lf", &minimumCoverageToRescue);
                assert(i == 1);
                break;
            case 'N':
                i = sscanf(optarg, "%" PRIi64, &minimumNumberOfSpecies);
                if (i != 1) {
                    st_errAbort("Error parsing minimumNumberOfSpecies parameter");
                }
                break;
            default:
                usage();
                return 1;
        }
    }

    st_setLogLevelFromString(logLevelString);

    /*
     * Load the flowerdisk
     */
    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); //We precache the sequences
    st_logInfo("Set up the flower disk\n");

    /*
     * Load the hmm
     */
    StateMachine *sM = stateMachine5_construct(fiveState);

    /*
     * For each flower.
     */
    if (calculateWhichEndsToComputeSeparately) {
        stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk);
        if (stList_length(flowers) != 1) {
            st_errAbort("We are breaking up a flower's end alignments for precomputation but we have %" PRIi64 " flowers.\n", stList_length(flowers));
        }
        stSortedSet *endsToAlignSeparately = getEndsToAlignSeparately(stList_get(flowers, 0), maximumLength, largeEndSize);
        assert(stSortedSet_size(endsToAlignSeparately) != 1);
        stSortedSetIterator *it = stSortedSet_getIterator(endsToAlignSeparately);
        End *end;
        while ((end = stSortedSet_getNext(it)) != NULL) {
            fprintf(stdout, "%s\t%" PRIi64 "\t%" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), end_getInstanceNumber(end), getTotalAdjacencyLength(end));
        }
        return 0; //avoid cleanup costs
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(endsToAlignSeparately);
    } else if (endAlignmentsToPrecomputeOutputFile != NULL) {
        /*
         * In this case we will align a set of end and save the alignments in a file.
         */
        stList *names = flowerWriter_parseNames(stdin);
        Flower *flower = cactusDisk_getFlower(cactusDisk, *((Name *)stList_get(names, 0)));
        FILE *fileHandle = fopen(endAlignmentsToPrecomputeOutputFile, "w");
        for(int64_t i=1; i<stList_length(names); i++) {
            End *end = flower_getEnd(flower, *((Name *)stList_get(names, i)));
            if (end == NULL) {
                st_errAbort("The end %" PRIi64 " was not found in the flower\n", *((Name *)stList_get(names, i)));
            }
            stSortedSet *endAlignment = makeEndAlignment(sM, end, spanningTrees, maximumLength, useProgressiveMerging,
                            matchGamma, pairwiseAlignmentBandingParameters);
            writeEndAlignmentToDisk(end, endAlignment, fileHandle);
            stSortedSet_destruct(endAlignment);
        }
        fclose(fileHandle);
        return 0; //avoid cleanup costs
        stList_destruct(names);
        st_logInfo("Finished precomputing end alignments\n");
    } else {
        /*
         * Compute complete flower alignments, possibly loading some precomputed alignments.
         */
        bedRegion *bedRegions = NULL;
        size_t numBeds = 0;
        if (ingroupCoverageFilePath != NULL) {
            // Pre-load the mmap for the coverage file.
            FILE *coverageFile = fopen(ingroupCoverageFilePath, "rb");
            if (coverageFile == NULL) {
                st_errnoAbort("Opening coverage file %s failed",
                              ingroupCoverageFilePath);
            }
            fseek(coverageFile, 0, SEEK_END);
            int64_t coverageFileLen = ftell(coverageFile);
            assert(coverageFileLen >= 0);
            assert(coverageFileLen % sizeof(bedRegion) == 0);
            if (coverageFileLen == 0) {
                // mmap doesn't like length-0 mappings, for obvious
                // reasons. Pretend that the coverage file doesn't
                // exist in this case, since it contains no data.
                ingroupCoverageFilePath = NULL;
            } else {
                // Establish a memory mapping for the file.
                bedRegions = mmap(NULL, coverageFileLen, PROT_READ, MAP_SHARED,
                                  fileno(coverageFile), 0);
                if (bedRegions == MAP_FAILED) {
                    st_errnoAbort("Failure mapping coverage file");
                }

                numBeds = coverageFileLen / sizeof(bedRegion);
            }
            fclose(coverageFile);
        }

        stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk);
        if (listOfEndAlignmentFiles != NULL && stList_length(flowers) != 1) {
            st_errAbort("We have precomputed alignments but %" PRIi64 " flowers to align.\n", stList_length(flowers));
        }
        cactusDisk_preCacheStrings(cactusDisk, flowers);
        for (j = 0; j < stList_length(flowers); j++) {
            flower = stList_get(flowers, j);
            st_logInfo("Processing a flower\n");

            stSortedSet *alignedPairs = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength,
                    useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters, pruneOutStubAlignments);
            st_logInfo("Created the alignment: %" PRIi64 " pairs\n", stSortedSet_size(alignedPairs));
            stPinchIterator *pinchIterator = stPinchIterator_constructFromAlignedPairs(alignedPairs, getNextAlignedPairAlignment);

            /*
             * Run the cactus caf functions to build cactus.
             */
            stPinchThreadSet *threadSet = stCaf_setup(flower);
            stCaf_anneal(threadSet, pinchIterator, NULL);
            if (minimumDegree < 2) {
                stCaf_makeDegreeOneBlocks(threadSet);
            }
            if (minimumIngroupDegree > 0 || minimumOutgroupDegree > 0 || minimumDegree > 1) {
                stCaf_melt(flower, threadSet, blockFilterFn, 0, 0, 0, INT64_MAX);
            }

            if (ingroupCoverageFilePath != NULL) {
                // Rescue any sequence that is covered by outgroups
                // but currently unaligned into single-degree blocks.
                stPinchThreadSetIt pinchIt = stPinchThreadSet_getIt(threadSet);
                stPinchThread *thread;
                while ((thread = stPinchThreadSetIt_getNext(&pinchIt)) != NULL) {
                    Cap *cap = flower_getCap(flower,
                                             stPinchThread_getName(thread));
                    assert(cap != NULL);
                    Sequence *sequence = cap_getSequence(cap);
                    assert(sequence != NULL);
                    rescueCoveredRegions(thread, bedRegions, numBeds,
                                         sequence_getName(sequence),
                                         minimumSizeToRescue,
                                         minimumCoverageToRescue);
                }
                stCaf_joinTrivialBoundaries(threadSet);
            }

            stCaf_finish(flower, threadSet, chainLengthForBigFlower, longChain, INT64_MAX, INT64_MAX); //Flower now destroyed.
            stPinchThreadSet_destruct(threadSet);
            st_logInfo("Ran the cactus core script.\n");

            /*
             * Cleanup
             */
            //Clean up the sorted set after cleaning up the iterator
            stPinchIterator_destruct(pinchIterator);
            stSortedSet_destruct(alignedPairs);

            st_logInfo("Finished filling in the alignments for the flower\n");
        }
        stList_destruct(flowers);
        //st_errAbort("Done\n");
        /*
         * Write and close the cactusdisk.
         */
        cactusDisk_write(cactusDisk);
        return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.
        if (bedRegions != NULL) {
            // Clean up our mapping.
            munmap(bedRegions, numBeds * sizeof(bedRegion));
        }
    }


    ///////////////////////////////////////////////////////////////////////////
    // Cleanup
    ///////////////////////////////////////////////////////////////////////////

    stateMachine_destruct(sM);
    cactusDisk_destruct(cactusDisk);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    //destructCactusCoreInputParameters(cCIP);
    free(cactusDiskDatabaseString);
    if (listOfEndAlignmentFiles != NULL) {
        stList_destruct(listOfEndAlignmentFiles);
    }
    if (logLevelString != NULL) {
        free(logLevelString);
    }
    st_logInfo("Finished with the flower disk for this flower.\n");

    //while(1);

    return 0;
}