void testEnd_getName(CuTest* testCase) { cactusEndTestSetup(); CuAssertTrue(testCase, end_getName(end) != NULL_NAME); CuAssertTrue(testCase, flower_getEnd(flower, end_getName(end)) == end); CuAssertTrue(testCase, flower_getEnd(flower, end_getName(end_getReverse(end))) == end); cactusEndTestTeardown(); }
void testEnd_setGroup(CuTest* testCase) { cactusEndTestSetup(); Flower *flower2 = flower_construct(cactusDisk); Group *group2 = group_construct2(flower2); End *end2 = end_construct(1, flower2); End *end3 = end_construct(1, flower2); CuAssertTrue(testCase, group_getEndNumber(group2) == 0); CuAssertTrue(testCase, end_getGroup(end2) == NULL); CuAssertTrue(testCase, end_getGroup(end3) == NULL); end_setGroup(end2, group2); CuAssertTrue(testCase, group_getEndNumber(group2) == 1); CuAssertTrue(testCase, end_getGroup(end2) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2); CuAssertTrue(testCase, end_getGroup(end3) == NULL); end_setGroup(end3, group2); CuAssertTrue(testCase, group_getEndNumber(group2) == 2); CuAssertTrue(testCase, end_getGroup(end2) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2); CuAssertTrue(testCase, end_getGroup(end3) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end3)) == end3); end_setGroup(end3, NULL); end_setGroup(end2, group2); CuAssertTrue(testCase, group_getEndNumber(group2) == 1); CuAssertTrue(testCase, end_getGroup(end2) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end2)) == end2); CuAssertTrue(testCase, end_getGroup(end3) == NULL); cactusEndTestTeardown(); }
void testGroup_updateContainedEnds(CuTest* testCase) { cactusGroupTestSetup(); end_copyConstruct(end3, nestedFlower); CuAssertTrue(testCase, group_getEndNumber(group) == 2); group_updateContainedEnds(group); CuAssertTrue(testCase, group_getEndNumber(group) == 3); CuAssertTrue(testCase, group_getEnd(group, end_getName(end1)) == end1); CuAssertTrue(testCase, group_getEnd(group, end_getName(end2)) == end2); CuAssertTrue(testCase, group_getEnd(group, end_getName(end3)) == end3); cactusGroupTestTeardown(); }
void testEnd_copyConstruct(CuTest* testCase) { cactusEndTestSetup(); Flower *flower2 = flower_construct(cactusDisk); eventTree_copyConstruct(eventTree, flower2, testEnd_copyConstructP); sequence_construct(metaSequence, flower2); End *end2 = end_copyConstruct(end, flower2); CuAssertTrue(testCase, end_getName(end2) != NULL_NAME); CuAssertTrue(testCase, end_getName(end2) == end_getName(end)); CuAssertTrue(testCase, flower_getEnd(flower2, end_getName(end2)) == end2); CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(rootCap))) == cap_getName(rootCap)); CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(leaf1Cap))) == cap_getName(leaf1Cap)); CuAssertTrue(testCase, cap_getName(end_getInstance(end2, cap_getName(leaf2Cap))) == cap_getName(leaf2Cap)); cactusEndTestTeardown(); }
static void recoverBrokenAdjacencies(Flower *flower, stList *recoveredCaps, Name referenceEventName) { /* * Find reference intervals that are book-ended by stubs created in a child flower. */ Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while((group = flower_getNextGroup(groupIt)) != NULL) { Flower *nestedFlower; if((nestedFlower = group_getNestedFlower(group)) != NULL) { Flower_EndIterator *endIt = flower_getEndIterator(nestedFlower); End *childEnd; while((childEnd = flower_getNextEnd(endIt)) != NULL) { if(end_isStubEnd(childEnd) && flower_getEnd(flower, end_getName(childEnd)) == NULL) { //We have a thread we need to promote Cap *childCap = getCapForReferenceEvent(childEnd, referenceEventName); //The cap in the reference assert(childCap != NULL); assert(!end_isAttached(childEnd)); childCap = cap_getStrand(childCap) ? childCap : cap_getReverse(childCap); if (!cap_getSide(childCap)) { Cap *adjacentChildCap = NULL; int64_t adjacencyLength = traceThreadLength(childCap, &adjacentChildCap); Cap *cap = copyCapToParent(childCap, recoveredCaps); assert(adjacentChildCap != NULL); assert(!end_isAttached(cap_getEnd(adjacentChildCap))); assert(!cap_getSide(cap)); Cap *adjacentCap = copyCapToParent(adjacentChildCap, recoveredCaps); cap_makeAdjacent(cap, adjacentCap); setAdjacencyLength(cap, adjacentCap, adjacencyLength); } } } flower_destructEndIterator(endIt); } } flower_destructGroupIterator(groupIt); }
void testGroup_addEnd(CuTest *testCase) { cactusGroupTestSetup(); CuAssertTrue(testCase, group_getEndNumber(group2) == 0); end_setGroup(end4, group2); CuAssertTrue(testCase, group_getEndNumber(group2) == 1); CuAssertTrue(testCase, end_getGroup(end4) == group2); CuAssertTrue(testCase, group_getEnd(group2, end_getName(end4)) == end4); cactusGroupTestTeardown(); }
void writeEndAlignmentToDisk(End *end, stSortedSet *endAlignment, FILE *fileHandle) { fprintf(fileHandle, "%s %" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), stSortedSet_size(endAlignment)); stSortedSetIterator *it = stSortedSet_getIterator(endAlignment); AlignedPair *aP; while((aP = stSortedSet_getNext(it)) != NULL) { fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %i %" PRIi64 " ", aP->subsequenceIdentifier, aP->position, aP->strand, aP->score); aP = aP->reverse; fprintf(fileHandle, "%" PRIi64 " %" PRIi64 " %i %" PRIi64 "\n", aP->subsequenceIdentifier, aP->position, aP->strand, aP->score); } stSortedSet_destructIterator(it); }
void testGroup_makeNonLeaf(CuTest *testCase) { cactusGroupTestSetup(); CuAssertTrue(testCase, group_isLeaf(group2)); end_setGroup(end4, group2); group_makeNestedFlower(group2); CuAssertTrue(testCase, !group_isLeaf(group2)); Flower *nestedFlower = group_getNestedFlower(group2); CuAssertTrue(testCase, nestedFlower != NULL); CuAssertTrue(testCase, !flower_builtBlocks(flower)); CuAssertTrue(testCase, !flower_builtTrees(flower)); CuAssertTrue(testCase, !flower_builtFaces(flower)); CuAssertTrue(testCase, flower_getName(nestedFlower) == group_getName(group2)); CuAssertTrue(testCase, flower_getParentGroup(nestedFlower) == group2); CuAssertTrue(testCase, flower_getEndNumber(nestedFlower) == 1); End *nestedEnd = flower_getFirstEnd(nestedFlower); CuAssertTrue(testCase, end_getName(end4) == end_getName(nestedEnd)); CuAssertTrue(testCase, end_getGroup(nestedEnd) != NULL); CuAssertTrue(testCase, flower_getGroupNumber(nestedFlower) == 1); CuAssertTrue(testCase, flower_isTerminal(nestedFlower)); cactusGroupTestTeardown(); }
bool endsAreConnected(End *end1, End *end2, stList *eventStrings) { if (end_getName(end1) == end_getName(end2)) { //Then the ends are the same and are part of the same chromosome by definition. End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1); Cap *cap1; while ((cap1 = end_getNext(instanceIterator)) != NULL) { if (capHasGivenEvents(cap1, eventStrings)) { end_destructInstanceIterator(instanceIterator); return 1; } } return 0; } End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1); Cap *cap1; while ((cap1 = end_getNext(instanceIterator)) != NULL) { if (capHasGivenEvents(cap1, eventStrings)) { End_InstanceIterator *instanceIterator2 = end_getInstanceIterator(end2); Cap *cap2; while ((cap2 = end_getNext(instanceIterator2)) != NULL) { assert(cap_getName(cap2) != cap_getName(cap1)); //This could only happen if end1 == end2 if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) { assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader(cap_getEvent(cap2))) == 0); assert(cap_getPositiveOrientation(cap1) != cap_getPositiveOrientation(cap2)); assert(cap_getName(cap1) != cap_getName(cap2)); //they could have the same coordinate if they represent two ends of a block of length 1. end_destructInstanceIterator(instanceIterator); end_destructInstanceIterator(instanceIterator2); return 1; } } end_destructInstanceIterator(instanceIterator2); } } end_destructInstanceIterator(instanceIterator); return 0; }
bool flower_removeIfRedundant(Flower *flower) { if (!flower_isLeaf(flower) && flower_getParentGroup(flower) != NULL && flower_getBlockNumber(flower) == 0) { //We will remove this flower.. Group *parentGroup = flower_getParentGroup(flower); //This group will be destructed //Deal with any parent chain.. if (group_isLink(parentGroup)) { link_split(group_getLink(parentGroup)); } Flower *parentFlower = group_getFlower(parentGroup); //We will add the groups in the flower to the parent /* * For each group in the flower we take its nested flower and attach it to the parent. */ Group *group; Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); while ((group = flower_getNextGroup(groupIt)) != NULL) { if (!group_isLeaf(group)) { //Copy the group into the parent.. Flower *nestedFlower = group_getNestedFlower(group); assert(nestedFlower != NULL); Group *newParentGroup = group_construct(parentFlower, nestedFlower); flower_setParentGroup(nestedFlower, newParentGroup); group_constructChainForLink(newParentGroup); } else { Group *newParentGroup = group_construct2(parentFlower); End *end; Group_EndIterator *endIt = group_getEndIterator(group); while ((end = group_getNextEnd(endIt)) != NULL) { End *parentEnd = flower_getEnd(parentFlower, end_getName(end)); assert(parentEnd != NULL); end_setGroup(parentEnd, newParentGroup); } group_destructEndIterator(endIt); group_constructChainForLink(newParentGroup); } } flower_destructGroupIterator(groupIt); //The group attached to the flower should now be empty assert(group_getEndNumber(parentGroup) == 0); group_destruct(parentGroup); //Now wipe the flower out.. cactusDisk_deleteFlowerFromDisk(flower_getCactusDisk(flower), flower); flower_destruct(flower, 0); return 1; } return 0; }
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength, bool useProgressiveMerging, float gapGamma, PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) { //Make an alignment of the sequences in the ends //Get the adjacency sequences to be aligned. Cap *cap; End_InstanceIterator *it = end_getInstanceIterator(end); stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct); stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct); stHash *endInstanceNumbers = stHash_construct2(NULL, free); while((cap = end_getNext(it)) != NULL) { if(cap_getSide(cap)) { cap = cap_getReverse(cap); } AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength); stList_append(sequences, adjacencySequence); assert(cap_getAdjacency(cap) != NULL); End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap))); stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd))); //Increase count of seqfrags with a given end. int64_t *c = stHash_search(endInstanceNumbers, otherEnd); if(c == NULL) { c = st_calloc(1, sizeof(int64_t)); assert(*c == 0); stHash_insert(endInstanceNumbers, otherEnd, c); } (*c)++; } end_destructInstanceIterator(it); //Get the alignment. MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters); //Build an array of weights to reweight pairs in the alignment. int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t)); //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing //common ends. for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) { stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i); int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1); int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2); assert(seq1 != seq2); SeqFrag *seqFrag1 = stList_get(seqFrags, seq1); SeqFrag *seqFrag2 = stList_get(seqFrags, seq2); int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds; pairwiseAlignmentsPerSequence[seq1]++; pairwiseAlignmentsPerSequence[seq2]++; } //Now calculate score adjustments. double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double)); for(int64_t i=0; i<stList_length(seqFrags); i++) { SeqFrag *seqFrag = stList_get(seqFrags, i); End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId); assert(otherEnd != NULL); assert(stHash_search(endInstanceNumbers, otherEnd) != NULL); int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd); int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber; assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber); assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber); assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0); //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]); //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i]; if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) { scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i]; assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0); assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber); } else { scoreAdjustmentsNonCommonEnds[i] = INT64_MIN; } if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) { scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i]; assert(scoreAdjustmentsCommonEnds[i] >= 1.0); assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1); } else { scoreAdjustmentsCommonEnds[i] = INT64_MIN; } } //Convert the alignment pairs to an alignment of the caps.. stSortedSet *sortedAlignment = stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn, (void (*)(void *))alignedPair_destruct); while(stList_length(mA->alignedPairs) > 0) { stIntTuple *alignedPair = stList_pop(mA->alignedPairs); assert(stIntTuple_length(alignedPair) == 5); int64_t seqIndex1 = stIntTuple_get(alignedPair, 1); int64_t seqIndex2 = stIntTuple_get(alignedPair, 3); AdjacencySequence *i = stList_get(sequences, seqIndex1); AdjacencySequence *j = stList_get(sequences, seqIndex2); assert(i != j); int64_t offset1 = stIntTuple_get(alignedPair, 2); int64_t offset2 = stIntTuple_get(alignedPair, 4); int64_t score = stIntTuple_get(alignedPair, 0); if(score <= 0) { //Happens when indel probs are included score = 1; //This is the minimum } assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1); SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1); SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2); assert(seqFrag1 != seqFrag2); double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds; assert(scoreAdjustments[seqIndex1] != INT64_MIN); assert(scoreAdjustments[seqIndex2] != INT64_MIN); AlignedPair *alignedPair2 = alignedPair_construct( i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand, j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand, score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here. assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL); assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL); stSortedSet_insert(sortedAlignment, alignedPair2); stSortedSet_insert(sortedAlignment, alignedPair2->reverse); stIntTuple_destruct(alignedPair); } //Cleanup stList_destruct(seqFrags); stList_destruct(sequences); free(pairwiseAlignmentsPerSequenceNonCommonEnds); free(pairwiseAlignmentsPerSequenceCommonEnds); free(scoreAdjustmentsNonCommonEnds); free(scoreAdjustmentsCommonEnds); multipleAlignment_destruct(mA); stHash_destruct(endInstanceNumbers); return sortedAlignment; }
void testGroup_getEnd(CuTest* testCase) { cactusGroupTestSetup(); CuAssertTrue(testCase, group_getEnd(group, end_getName(end1)) == end1); CuAssertTrue(testCase, group_getEnd(group, end_getName(end2)) == end2); cactusGroupTestTeardown(); }
static int flower_constructEndsP(const void *o1, const void *o2) { return cactusMisc_nameCompare(end_getName((End *) o1), end_getName((End *) o2)); }
void testFlower_removeIfRedundant(CuTest *testCase) { /* * Do a simple test to see if function can remove a redundant flower. */ cactusFlowerTestSetup(); endsSetup(); //First construct a redundant flower from the root. Flower *flower2 = flower_construct(cactusDisk); Group *group = group_construct(flower, flower2); end_setGroup(end, group); end_setGroup(end2, group); //Now hang another couple of flowers of that. Flower *flower3 = flower_construct(cactusDisk); group_construct(flower2, flower3); //Now hang another flower of that. Group *group3b = group_construct2(flower2); //Finally hang one more flower on the end.. Flower *flower4 = flower_construct(cactusDisk); group_construct(flower3, flower4); //Copy the ends into the flowers. end_copyConstruct(end, flower2); end_copyConstruct(end2, flower2); end_copyConstruct(end, flower3); end_setGroup(flower_getEnd(flower2, end_getName(end2)), group3b); end_copyConstruct(end, flower4); //st_uglyf("I got %" PRIi64 " %" PRIi64 " %" PRIi64 " %" PRIi64 "\n", flower_getName(flower), flower_getName(flower2), flower_getName(flower3), flower_getName(flower4)); //Write the mess to disk. cactusDisk_write(cactusDisk); //Now test the removal function (check we get a negative on this leaf). CuAssertTrue(testCase, !flower_removeIfRedundant(flower4)); //Check we can't remove the root.. CuAssertTrue(testCase, !flower_removeIfRedundant(flower)); //We will remove flower2 //Before CuAssertTrue(testCase, flower_getGroupNumber(flower) == 1); CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower2)) == flower); CuAssertTrue(testCase, flower_removeIfRedundant(flower2)); //After, check the flower/group connections CuAssertTrue(testCase, flower_getGroupNumber(flower) == 2); CuAssertTrue(testCase, !flower_isLeaf(flower)); CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower3)) == flower); group3b = end_getGroup(end2); CuAssertTrue(testCase, group_getFlower(group3b) == flower); CuAssertTrue(testCase, group_isLeaf(group3b)); CuAssertTrue(testCase, flower_getGroup(flower, flower_getName(flower3)) == flower_getParentGroup(flower3)); //Check the ends.. CuAssertTrue(testCase, flower_getEndNumber(flower) == 2); CuAssertTrue(testCase, flower_getEndNumber(flower3) == 1); CuAssertTrue(testCase, group_getEndNumber(group3b) == 1); CuAssertTrue(testCase, end_getGroup(end) == flower_getParentGroup(flower3)); CuAssertTrue(testCase, end_getGroup(end2) == group3b); CuAssertTrue(testCase, flower_getEnd(flower3, end_getName(end)) != NULL); //Check the child of 3 is still okay.. CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower4)) == flower3); //Now do removal of flower3 CuAssertTrue(testCase, !flower_removeIfRedundant(flower)); CuAssertTrue(testCase, !flower_removeIfRedundant(flower4)); CuAssertTrue(testCase, flower_removeIfRedundant(flower3)); //Check groups again CuAssertTrue(testCase, flower_getGroupNumber(flower) == 2); CuAssertTrue(testCase, !flower_isLeaf(flower)); CuAssertTrue(testCase, group_getFlower(flower_getParentGroup(flower4)) == flower); CuAssertTrue(testCase, group_getFlower(group3b) == flower); CuAssertTrue(testCase, flower_getGroup(flower, flower_getName(flower4)) == flower_getParentGroup(flower4)); //Check the ends again.. CuAssertTrue(testCase, flower_getEndNumber(flower) == 2); CuAssertTrue(testCase, flower_getEndNumber(flower4) == 1); CuAssertTrue(testCase, group_getEndNumber(group3b) == 1); CuAssertTrue(testCase, end_getGroup(end) == flower_getParentGroup(flower4)); CuAssertTrue(testCase, end_getGroup(end2) == group3b); CuAssertTrue(testCase, flower_getEnd(flower4, end_getName(end)) != NULL); cactusFlowerTestTeardown(); }
int main(int argc, char *argv[]) { char * logLevelString = NULL; char * cactusDiskDatabaseString = NULL; int64_t i, j; int64_t spanningTrees = 10; int64_t maximumLength = 1500; bool useProgressiveMerging = 0; float matchGamma = 0.5; bool useBanding = 0; int64_t k; stList *listOfEndAlignmentFiles = NULL; char *endAlignmentsToPrecomputeOutputFile = NULL; bool calculateWhichEndsToComputeSeparately = 0; int64_t largeEndSize = 1000000; int64_t chainLengthForBigFlower = 1000000; int64_t longChain = 2; char *ingroupCoverageFilePath = NULL; int64_t minimumSizeToRescue = 1; double minimumCoverageToRescue = 0.0; PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters = pairwiseAlignmentBandingParameters_construct(); /* * Setup the input parameters for cactus core. */ bool pruneOutStubAlignments = 0; /* * Parse the options. */ while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "help", no_argument, 0, 'h' }, { "spanningTrees", required_argument, 0, 'i' }, { "maximumLength", required_argument, 0, 'j' }, { "useBanding", no_argument, 0, 'k' }, { "gapGamma", required_argument, 0, 'l' }, { "matchGamma", required_argument, 0, 'L' }, { "splitMatrixBiggerThanThis", required_argument, 0, 'o' }, { "anchorMatrixBiggerThanThis", required_argument, 0, 'p' }, { "repeatMaskMatrixBiggerThanThis", required_argument, 0, 'q' }, { "diagonalExpansion", required_argument, 0, 'r' }, { "constraintDiagonalTrim", required_argument, 0, 't' }, { "minimumDegree", required_argument, 0, 'u' }, { "alignAmbiguityCharacters", no_argument, 0, 'w' }, { "pruneOutStubAlignments", no_argument, 0, 'y' }, { "minimumIngroupDegree", required_argument, 0, 'A' }, { "minimumOutgroupDegree", required_argument, 0, 'B' }, { "precomputedAlignments", required_argument, 0, 'D' }, { "endAlignmentsToPrecomputeOutputFile", required_argument, 0, 'E' }, { "useProgressiveMerging", no_argument, 0, 'F' }, { "calculateWhichEndsToComputeSeparately", no_argument, 0, 'G' }, { "largeEndSize", required_argument, 0, 'I' }, {"ingroupCoverageFile", required_argument, 0, 'J'}, {"minimumSizeToRescue", required_argument, 0, 'K'}, {"minimumCoverageToRescue", required_argument, 0, 'M'}, { "minimumNumberOfSpecies", required_argument, 0, 'N' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "a:b:hi:j:kl:o:p:q:r:t:u:wy:A:B:D:E:FGI:J:K:L:M:N:", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = stString_copy(optarg); st_setLogLevelFromString(logLevelString); break; case 'b': cactusDiskDatabaseString = stString_copy(optarg); break; case 'h': usage(); return 0; case 'i': i = sscanf(optarg, "%" PRIi64 "", &spanningTrees); (void) i; assert(i == 1); assert(spanningTrees >= 0); break; case 'j': i = sscanf(optarg, "%" PRIi64 "", &maximumLength); assert(i == 1); assert(maximumLength >= 0); break; case 'k': useBanding = !useBanding; break; case 'l': i = sscanf(optarg, "%f", &pairwiseAlignmentBandingParameters->gapGamma); assert(i == 1); assert(pairwiseAlignmentBandingParameters->gapGamma >= 0.0); break; case 'L': i = sscanf(optarg, "%f", &matchGamma); assert(i == 1); assert(matchGamma >= 0.0); break; case 'o': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->splitMatrixBiggerThanThis = (int64_t) k * k; break; case 'p': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->anchorMatrixBiggerThanThis = (int64_t) k * k; break; case 'q': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->repeatMaskMatrixBiggerThanThis = (int64_t) k * k; break; case 'r': i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->diagonalExpansion); assert(i == 1); assert(pairwiseAlignmentBandingParameters->diagonalExpansion >= 0); assert(pairwiseAlignmentBandingParameters->diagonalExpansion % 2 == 0); break; case 't': i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->constraintDiagonalTrim); assert(i == 1); assert(pairwiseAlignmentBandingParameters->constraintDiagonalTrim >= 0); break; case 'u': i = sscanf(optarg, "%" PRIi64 "", &minimumDegree); assert(i == 1); break; case 'w': pairwiseAlignmentBandingParameters->alignAmbiguityCharacters = 1; break; case 'y': pruneOutStubAlignments = 1; break; case 'A': i = sscanf(optarg, "%" PRIi64 "", &minimumIngroupDegree); assert(i == 1); break; case 'B': i = sscanf(optarg, "%" PRIi64 "", &minimumOutgroupDegree); assert(i == 1); break; case 'D': listOfEndAlignmentFiles = stString_split(optarg); break; case 'E': endAlignmentsToPrecomputeOutputFile = stString_copy(optarg); break; case 'F': useProgressiveMerging = 1; break; case 'G': calculateWhichEndsToComputeSeparately = 1; break; case 'I': i = sscanf(optarg, "%" PRIi64 "", &largeEndSize); assert(i == 1); break; case 'J': ingroupCoverageFilePath = stString_copy(optarg); break; case 'K': i = sscanf(optarg, "%" PRIi64, &minimumSizeToRescue); assert(i == 1); break; case 'M': i = sscanf(optarg, "%lf", &minimumCoverageToRescue); assert(i == 1); break; case 'N': i = sscanf(optarg, "%" PRIi64, &minimumNumberOfSpecies); if (i != 1) { st_errAbort("Error parsing minimumNumberOfSpecies parameter"); } break; default: usage(); return 1; } } st_setLogLevelFromString(logLevelString); /* * Load the flowerdisk */ stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); //We precache the sequences st_logInfo("Set up the flower disk\n"); /* * Load the hmm */ StateMachine *sM = stateMachine5_construct(fiveState); /* * For each flower. */ if (calculateWhichEndsToComputeSeparately) { stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk); if (stList_length(flowers) != 1) { st_errAbort("We are breaking up a flower's end alignments for precomputation but we have %" PRIi64 " flowers.\n", stList_length(flowers)); } stSortedSet *endsToAlignSeparately = getEndsToAlignSeparately(stList_get(flowers, 0), maximumLength, largeEndSize); assert(stSortedSet_size(endsToAlignSeparately) != 1); stSortedSetIterator *it = stSortedSet_getIterator(endsToAlignSeparately); End *end; while ((end = stSortedSet_getNext(it)) != NULL) { fprintf(stdout, "%s\t%" PRIi64 "\t%" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), end_getInstanceNumber(end), getTotalAdjacencyLength(end)); } return 0; //avoid cleanup costs stSortedSet_destructIterator(it); stSortedSet_destruct(endsToAlignSeparately); } else if (endAlignmentsToPrecomputeOutputFile != NULL) { /* * In this case we will align a set of end and save the alignments in a file. */ stList *names = flowerWriter_parseNames(stdin); Flower *flower = cactusDisk_getFlower(cactusDisk, *((Name *)stList_get(names, 0))); FILE *fileHandle = fopen(endAlignmentsToPrecomputeOutputFile, "w"); for(int64_t i=1; i<stList_length(names); i++) { End *end = flower_getEnd(flower, *((Name *)stList_get(names, i))); if (end == NULL) { st_errAbort("The end %" PRIi64 " was not found in the flower\n", *((Name *)stList_get(names, i))); } stSortedSet *endAlignment = makeEndAlignment(sM, end, spanningTrees, maximumLength, useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters); writeEndAlignmentToDisk(end, endAlignment, fileHandle); stSortedSet_destruct(endAlignment); } fclose(fileHandle); return 0; //avoid cleanup costs stList_destruct(names); st_logInfo("Finished precomputing end alignments\n"); } else { /* * Compute complete flower alignments, possibly loading some precomputed alignments. */ bedRegion *bedRegions = NULL; size_t numBeds = 0; if (ingroupCoverageFilePath != NULL) { // Pre-load the mmap for the coverage file. FILE *coverageFile = fopen(ingroupCoverageFilePath, "rb"); if (coverageFile == NULL) { st_errnoAbort("Opening coverage file %s failed", ingroupCoverageFilePath); } fseek(coverageFile, 0, SEEK_END); int64_t coverageFileLen = ftell(coverageFile); assert(coverageFileLen >= 0); assert(coverageFileLen % sizeof(bedRegion) == 0); if (coverageFileLen == 0) { // mmap doesn't like length-0 mappings, for obvious // reasons. Pretend that the coverage file doesn't // exist in this case, since it contains no data. ingroupCoverageFilePath = NULL; } else { // Establish a memory mapping for the file. bedRegions = mmap(NULL, coverageFileLen, PROT_READ, MAP_SHARED, fileno(coverageFile), 0); if (bedRegions == MAP_FAILED) { st_errnoAbort("Failure mapping coverage file"); } numBeds = coverageFileLen / sizeof(bedRegion); } fclose(coverageFile); } stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk); if (listOfEndAlignmentFiles != NULL && stList_length(flowers) != 1) { st_errAbort("We have precomputed alignments but %" PRIi64 " flowers to align.\n", stList_length(flowers)); } cactusDisk_preCacheStrings(cactusDisk, flowers); for (j = 0; j < stList_length(flowers); j++) { flower = stList_get(flowers, j); st_logInfo("Processing a flower\n"); stSortedSet *alignedPairs = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength, useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters, pruneOutStubAlignments); st_logInfo("Created the alignment: %" PRIi64 " pairs\n", stSortedSet_size(alignedPairs)); stPinchIterator *pinchIterator = stPinchIterator_constructFromAlignedPairs(alignedPairs, getNextAlignedPairAlignment); /* * Run the cactus caf functions to build cactus. */ stPinchThreadSet *threadSet = stCaf_setup(flower); stCaf_anneal(threadSet, pinchIterator, NULL); if (minimumDegree < 2) { stCaf_makeDegreeOneBlocks(threadSet); } if (minimumIngroupDegree > 0 || minimumOutgroupDegree > 0 || minimumDegree > 1) { stCaf_melt(flower, threadSet, blockFilterFn, 0, 0, 0, INT64_MAX); } if (ingroupCoverageFilePath != NULL) { // Rescue any sequence that is covered by outgroups // but currently unaligned into single-degree blocks. stPinchThreadSetIt pinchIt = stPinchThreadSet_getIt(threadSet); stPinchThread *thread; while ((thread = stPinchThreadSetIt_getNext(&pinchIt)) != NULL) { Cap *cap = flower_getCap(flower, stPinchThread_getName(thread)); assert(cap != NULL); Sequence *sequence = cap_getSequence(cap); assert(sequence != NULL); rescueCoveredRegions(thread, bedRegions, numBeds, sequence_getName(sequence), minimumSizeToRescue, minimumCoverageToRescue); } stCaf_joinTrivialBoundaries(threadSet); } stCaf_finish(flower, threadSet, chainLengthForBigFlower, longChain, INT64_MAX, INT64_MAX); //Flower now destroyed. stPinchThreadSet_destruct(threadSet); st_logInfo("Ran the cactus core script.\n"); /* * Cleanup */ //Clean up the sorted set after cleaning up the iterator stPinchIterator_destruct(pinchIterator); stSortedSet_destruct(alignedPairs); st_logInfo("Finished filling in the alignments for the flower\n"); } stList_destruct(flowers); //st_errAbort("Done\n"); /* * Write and close the cactusdisk. */ cactusDisk_write(cactusDisk); return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. if (bedRegions != NULL) { // Clean up our mapping. munmap(bedRegions, numBeds * sizeof(bedRegion)); } } /////////////////////////////////////////////////////////////////////////// // Cleanup /////////////////////////////////////////////////////////////////////////// stateMachine_destruct(sM); cactusDisk_destruct(cactusDisk); stKVDatabaseConf_destruct(kvDatabaseConf); //destructCactusCoreInputParameters(cCIP); free(cactusDiskDatabaseString); if (listOfEndAlignmentFiles != NULL) { stList_destruct(listOfEndAlignmentFiles); } if (logLevelString != NULL) { free(logLevelString); } st_logInfo("Finished with the flower disk for this flower.\n"); //while(1); return 0; }