static void splitIntoAdjacenciesStubsAndChains(stList *subCycle,
        stList *adjacencyEdges, stList *stubEdges, stList *chainEdges,
        stList **subAdjacencyEdges, stList **subStubEdges,
        stList **subChainEdges) {
    /*
     * Splits run into cycles and chains..
     */
    *subStubEdges = stList_construct();
    *subChainEdges = stList_construct();
    for (int64_t j = 0; j < stList_length(subCycle); j++) {
        stIntTuple *edge = stList_get(subCycle, j);
        if (stList_contains(stubEdges, edge)) {
            stList_append(*subStubEdges, edge);
        } else if (stList_contains(chainEdges, edge)) {
            stList_append(*subChainEdges, edge);
        }
    }
    *subAdjacencyEdges = stList_construct();
    stSortedSet *nodes = getNodeSetOfEdges(subCycle);
    for (int64_t j = 0; j < stList_length(adjacencyEdges); j++) {
        stIntTuple *edge = stList_get(adjacencyEdges, j);
        if (nodeInSet(nodes, stIntTuple_get(edge, 0)) && nodeInSet(
                nodes, stIntTuple_get(edge, 1))) {
            stList_append(*subAdjacencyEdges, edge);
        }
    }
    stSortedSet_destruct(nodes);
}
static stList *mergeSimpleCycles2(stList *chosenEdges,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges,
        stList *stubEdges, stList *chainEdges) {
    /*
     * Returns a new set of chosen edges, modified by adjacency switches such that every simple cycle
     * contains at least one stub edge.
     */

    /*
     * Calculate components.
     */
    stList *components = getComponents2(chosenEdges, stubEdges, chainEdges);

    /*
     * Divide the components by the presence of one or more stub edges.
     */
    stSortedSet *stubEdgesSet = stList_getSortedSet(stubEdges,
            (int(*)(const void *, const void *)) stIntTuple_cmpFn);

    stList *stubContainingComponents = stList_construct();
    stList *stubFreeComponents = stList_construct();

    for (int64_t i = 0; i < stList_length(components); i++) {
        stList *component = stList_get(components, i);
        stList_append(
                intersectionSize(stubEdgesSet, component) > 0 ? stubContainingComponents
                        : stubFreeComponents, component);
    }
    assert(stList_length(stubContainingComponents) > 0);
    stSortedSet_destruct(stubEdgesSet);

    /*
     * Merge the stub containing components into one 'global' component
     */
    stList *globalComponent = stList_join(stubContainingComponents);
    stList_destruct(stubContainingComponents);

    /*
     * Remove the stub/chain edges from the components.
     */
    stList_append(stubFreeComponents, globalComponent);
    stList *adjacencyOnlyComponents = getStubAndChainEdgeFreeComponents(
            stubFreeComponents, stubEdges, chainEdges);

    stList_destruct(stubFreeComponents);
    stList_destruct(globalComponent);
    stList_destruct(components); //We only clean this up now, as this frees the components it contains.

    /*
     * Merge stub free components into the others.
     */
    stList *updatedChosenEdges = mergeSimpleCycles(adjacencyOnlyComponents,
            nonZeroWeightAdjacencyEdges, allAdjacencyEdges);
    stList_destruct(adjacencyOnlyComponents);

    return updatedChosenEdges;
}
stList *getMatchingWithCyclicConstraints(stSortedSet *nodes,
        stList *adjacencyEdges, stList *stubEdges, stList *chainEdges,
        bool makeStubCyclesDisjoint,
        stList *(*matchingAlgorithm)(stList *edges, int64_t nodeNumber)) {
    /*
     * Check the inputs.
     */
    checkInputs(nodes, adjacencyEdges, stubEdges, chainEdges);
    st_logDebug("Checked the inputs\n");

    if (stSortedSet_size(nodes) == 0) { //Some of the following functions assume there are at least 2 nodes.
        return stList_construct();
    }

    stList *chosenEdges = getPerfectMatching(nodes, adjacencyEdges, matchingAlgorithm);

    stSortedSet *allAdjacencyEdges = stList_getSortedSet(adjacencyEdges,
                (int(*)(const void *, const void *)) stIntTuple_cmpFn);
    stList *nonZeroWeightAdjacencyEdges = getEdgesWithGreaterThanZeroWeight(
                    adjacencyEdges);

    stList *updatedChosenEdges = makeMatchingObeyCyclicConstraints(nodes, chosenEdges, allAdjacencyEdges, nonZeroWeightAdjacencyEdges, stubEdges, chainEdges, makeStubCyclesDisjoint);
    stList_destruct(chosenEdges);
    chosenEdges = updatedChosenEdges;

    stList_destruct(nonZeroWeightAdjacencyEdges);
    stSortedSet_destruct(allAdjacencyEdges);

    return chosenEdges;
}
stList *chooseMatching_greedy(stList *edges, int64_t nodeNumber) {
    /*
     * Greedily picks the edge from the list such that each node has at most one edge.
     */
    //First clone the list..
    edges = stList_copy(edges, NULL);

    stSortedSet *seen = getEmptyNodeOrEdgeSetWithCleanup();
    stList *matching = stList_construct();

    //Sort the adjacency pairs..
    stList_sort(edges, chooseMatching_greedyP);

    double strength = INT64_MAX;
    while (stList_length(edges) > 0) {
        stIntTuple *edge = stList_pop(edges);
        double d = stIntTuple_get(edge, 2);
        assert(d <= strength);
        strength = d;
        if(!nodeInSet(seen, stIntTuple_get(edge, 0)) && !nodeInSet(seen, stIntTuple_get(edge, 1))) {
            addNodeToSet(seen, stIntTuple_get(edge, 0));
            addNodeToSet(seen, stIntTuple_get(edge, 1));
            stList_append(matching,edge);
        }
    }
    assert(stList_length(edges) == 0);
    stList_destruct(edges);
    stSortedSet_destruct(seen);

    return matching;
}
static stList *readMatching(FILE *fileHandle, stList *originalEdges) {
    /*
     * Reads the matching created by Blossum.
     */
    stHash *originalEdgesHash = putEdgesInHash(originalEdges);
    char *line = stFile_getLineFromFile(fileHandle);
    assert(line != NULL);
    int64_t nodeNumber, edgeNumber;
    int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 "\n", &nodeNumber, &edgeNumber);
    assert(i == 2);
    free(line);
    stList *chosenEdges = stList_construct();
    for(int64_t j=0; j<edgeNumber; j++) {
        line = stFile_getLineFromFile(fileHandle);
        int64_t node1, node2;
        i = sscanf(line, "%" PRIi64 " %" PRIi64 "", &node1, &node2);
        assert(i == 2);
        free(line);
        assert(node1 >= 0);
        assert(node1 < nodeNumber);
        assert(node2 >= 0);
        assert(node2 < nodeNumber);
        stIntTuple *edge = constructEdge(node1, node2);
        stIntTuple *originalEdge = stHash_search(originalEdgesHash, edge);
        if(originalEdge != NULL) {
            stList_append(chosenEdges, originalEdge);
        }
        stIntTuple_destruct(edge);
    }
    stHash_destruct(originalEdgesHash);
    return chosenEdges;
}
Пример #6
0
void stCaf_addAdjacencies(Flower *flower) {
    //Build a list of caps.
    stList *list = stList_construct();
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        End_InstanceIterator *instanceIterator = end_getInstanceIterator(end);
        Cap *cap;
        while ((cap = end_getNext(instanceIterator)) != NULL) {
            if (!cap_getStrand(cap)) {
                cap = cap_getReverse(cap);
            }
            stList_append(list, cap);
        }
        end_destructInstanceIterator(instanceIterator);
    }
    flower_destructEndIterator(endIterator);
    assert(stList_length(list) % 2 == 0);
    //Sort the list of caps.
    stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP);
    //Now make the adjacencies.
    for (int64_t i = 1; i < stList_length(list); i += 2) {
        Cap *cap = stList_get(list, i - 1);
        Cap *cap2 = stList_get(list, i);
        cap_makeAdjacent(cap, cap2);
    }
    //Clean up.
    stList_destruct(list);
}
Пример #7
0
static void setup() {
    teardown();
    list = stList_construct();
    int64_t i;
    for(i=0; i<stringNumber; i++) {
        stList_append(list, strings[i]);
    }
}
Пример #8
0
stList *stSet_getKeys(stSet *set) {
    stList *list = stList_construct();
    stSetIterator *iterator = stSet_getIterator(set);
    void *item;
    while ((item = stSet_getNext(iterator)) != NULL) {
        stList_append(list, item);
    }
    stSet_destructIterator(iterator);
    return list;
}
static AdjacencySwitch *getBest2EdgeAdjacencySwitch(stList *components,
        stSortedSet *allAdjacencyEdges) {
    /*
     * Look for the two lowest value adjacency edges in all current edges that are in a separate component and returns them as an adjacency switch
     * with now new adjacency edges.
     */

    /*
     * Get lowest scoring edge for each component.
     */
    stList *lowestScoringEdgeFromEachComponent = stList_construct();
    for (int64_t i = 0; i < stList_length(components); i++) {
        stList_append(lowestScoringEdgeFromEachComponent,
                getLowestScoringEdge(stList_get(components, i)));
    }

    /*
     * Get two lowest scoring edges.
     */
    stList_sort(lowestScoringEdgeFromEachComponent,
            getBest2EdgeAdjacencySwitchP);
    stIntTuple *lowestScoreEdge1 = stList_get(
            lowestScoringEdgeFromEachComponent, 0);
    stIntTuple *lowestScoreEdge2 = stList_get(
            lowestScoringEdgeFromEachComponent, 1);
    assert(lowestScoreEdge1 != lowestScoreEdge2);

    stList_destruct(lowestScoringEdgeFromEachComponent); //Cleanup

    stIntTuple *newEdge1 = getWeightedEdgeFromSet(
            stIntTuple_get(lowestScoreEdge1, 0),
            stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges);
    stIntTuple *newEdge2 = getWeightedEdgeFromSet(
            stIntTuple_get(lowestScoreEdge1, 1),
            stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges);
    if (newEdge1 == NULL) {
        assert(newEdge2 == NULL);
        newEdge1 = getWeightedEdgeFromSet(
                stIntTuple_get(lowestScoreEdge1, 0),
                stIntTuple_get(lowestScoreEdge2, 1), allAdjacencyEdges);
        newEdge2 = getWeightedEdgeFromSet(
                stIntTuple_get(lowestScoreEdge1, 1),
                stIntTuple_get(lowestScoreEdge2, 0), allAdjacencyEdges);
    }
    assert(newEdge1 != NULL);
    assert(newEdge2 != NULL);

    return adjacencySwitch_construct(
            lowestScoreEdge1,
            lowestScoreEdge2,
            newEdge1,
            newEdge2,
            stIntTuple_get(lowestScoreEdge1, 2)
                    + stIntTuple_get(lowestScoreEdge2, 2));
}
Пример #10
0
/* build a list of blocks, sorted by the root components */
static stList *buildRootSorted(struct malnSet *malnSet) {
    stList *sorted = stList_construct();
    struct malnBlkSetIterator *iter = malnBlkSet_getIterator(malnSet->blks);
    struct malnBlk *blk;
    while ((blk = malnBlkSetIterator_getNext(iter)) != NULL) {
        stList_append(sorted, blk);
    }
    malnBlkSetIterator_destruct(iter);
    stList_sort(sorted, blkCmpRootComp);
    return sorted;
}
static stList *chooseAdjacencyPairing_externalProgram(stList *edges, int64_t nodeNumber, const char *programName) {
    /*
     * We create temp files to hold stuff.
     */
    if(nodeNumber <= 1) {
        assert(stList_length(edges) == 0);
        return stList_construct();
    }

    char *tempInputFile = getTempFile(), *tempOutputFile = getTempFile();

    /*
     * We write the graph to a temp file.
     */
    FILE *fileHandle = fopen(tempInputFile, "w");
    if(strcmp(programName, "blossom5") == 0) { //Must be all connected as
        //generates perfect matchings.
        writeCliqueGraph(fileHandle, edges, nodeNumber, 1);
    }
    else {
        writeGraph(fileHandle, edges, nodeNumber);
    }
    fclose(fileHandle);

    /*
     * We run the external program.
     */
    char *command = stString_print("%s -e %s -w %s >& /dev/null", programName, tempInputFile, tempOutputFile);
    int64_t i = st_system(command);
    if(i != 0) {
        st_errAbort("Something went wrong with the command: %s", command);
        //For some reason this causes a seg fault
        //stThrowNew(MATCHING_EXCEPTION, "Something went wrong with the command: %s", command);
    }
    free(command);

    /*
     * We get back the matching.
     */
    fileHandle = fopen(tempOutputFile, "r");
    stList *matching = readMatching(fileHandle, edges);
    fclose(fileHandle);
    st_logDebug("The adjacency matching for %" PRIi64 " nodes with %" PRIi64 " initial edges contains %" PRIi64 " edges\n", nodeNumber, stList_length(edges), stList_length(matching));

    /*
     * Get rid of the temp files..
     */
    st_system("rm -rf %s %s", tempInputFile, tempOutputFile);
    free(tempInputFile);
    free(tempOutputFile);

    return matching;
}
Пример #12
0
int main(int argc, char *argv[]) {
    //////////////////////////////////////////////
    //Parse the inputs
    //////////////////////////////////////////////

    parseBasicArguments(argc, argv, "linkageStats");

    ///////////////////////////////////////////////////////////////////////////
    // Get the intervals
    ///////////////////////////////////////////////////////////////////////////

    stList *haplotypeEventStrings = getEventStrings(
            treatHaplotype1AsContamination ? NULL : hap1EventString,
            treatHaplotype2AsContamination ? NULL : hap2EventString);
    stList *assemblyEventStringInList = stList_construct();
    stList_append(assemblyEventStringInList, assemblyEventString);

    stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct);
    for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) {
        const char *hapEventString = stList_get(haplotypeEventStrings, i);
        st_logInfo("Getting contig paths for haplotype: %s", hapEventString);
        stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList);
        stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString,
                assemblyEventStringInList);
        stList_destruct(contigPaths);
        st_logInfo("Getting contig paths\n");
        stList_appendAll(intervals, hapIntervals);
        stList_setDestructor(hapIntervals, NULL);
        stList_destruct(hapIntervals);
    }

    st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals));

    ///////////////////////////////////////////////////////////////////////////
    // Write it out.
    ///////////////////////////////////////////////////////////////////////////

    FILE *fileHandle = fopen(outputFile, "w");
    for (int64_t i = 0; i < stList_length(intervals); i++) {
        SequenceInterval *sequenceInterval = stList_get(intervals, i);
        st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
        fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
    }

    st_logInfo("Finished writing out the stats.\n");
    fclose(fileHandle);

    return 0;
}
static stList *getOddToEvenAdjacencyEdges(stSortedSet *oddNodes,
        stList *adjacencyEdges) {
    /*
     * Gets edges that include one node in the set of oddNodes, but not both.
     */
    stList *oddToEvenAdjacencyEdges = stList_construct();
    for (int64_t i = 0; i < stList_length(adjacencyEdges); i++) {
        stIntTuple *edge = stList_get(adjacencyEdges, i);
        if (nodeInSet(oddNodes, stIntTuple_get(edge, 0)) ^ nodeInSet(
                oddNodes, stIntTuple_get(edge, 1))) {
            stList_append(oddToEvenAdjacencyEdges, edge);
        }
    }
    return oddToEvenAdjacencyEdges;
}
Пример #14
0
/* clone the root. */
static stTree *subrangeCloneRoot(stTree *srcRoot, struct malnCompCompMap *srcDestCompMap) {
    // clone root, if deleted, these must only be one child (due to the way
    // the trees are constructed).
    stList *pendingSubtrees = stList_construct();
    stTree *destRoot = subrangeCloneNode(srcRoot, srcDestCompMap, pendingSubtrees);
    if (destRoot == NULL) {
        if (stList_length(pendingSubtrees) > 1) {
            struct mafTreeNodeCompLink *srcNcLink = getNodeCompLink(srcRoot);
            errAbort("deleted tree root %s (component: %s:%d-%d/%c)) has more that one child", stTree_getLabel(srcRoot), srcNcLink->comp->seq->orgSeqName, srcNcLink->comp->start, srcNcLink->comp->end, srcNcLink->comp->strand);
        } else if (stList_length(pendingSubtrees) == 1) {
            destRoot = stList_pop(pendingSubtrees);
        }
    }
    stList_destruct(pendingSubtrees);
    return destRoot;
}
Пример #15
0
static void test_stSet_removeAndFreeKey(CuTest* testCase) {
    stSet *set2 = stSet_construct2(free);
    stList *keys = stList_construct();
    int64_t keyNumber = 1000;
    for (int64_t i = 0; i < keyNumber; i++) {
        int64_t *key = st_malloc(sizeof(*key));
        stList_append(keys, key);
        stSet_insert(set2, key);
    }
    for (int64_t i = 0; i < keyNumber; i++) {
        int64_t *key = stList_get(keys, i);
        CuAssertPtrEquals(testCase, key, stSet_removeAndFreeKey(set2, key));
    }
    CuAssertIntEquals(testCase, 0, stSet_size(set2));
    stSet_destruct(set2);
    stList_destruct(keys);
}
static stList *getEdgesThatBridgeComponents(stList *components,
        stHash *nodesToNonZeroWeightedAdjacencyEdges) {
    /*
     * Get set of adjacency edges that bridge between (have a node in two) components.
     */

    stList *bridgingAdjacencyEdges = stList_construct();

    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *componentNodes = getNodeSetOfEdges(
                stList_get(components, i));
        stSortedSetIterator *it = stSortedSet_getIterator(componentNodes);
        stIntTuple *node;
        while ((node = stSortedSet_getNext(it)) != NULL) {
            stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges,
                    node);
            if (edges != NULL) {
                for (int64_t j = 0; j < stList_length(edges); j++) {
                    stIntTuple *edge = stList_get(edges, j);
                    stIntTuple *node1 = stIntTuple_construct1(
                            stIntTuple_get(edge, 0));
                    stIntTuple *node2 = stIntTuple_construct1(
                            stIntTuple_get(edge, 1));
                    assert(
                            stSortedSet_search(componentNodes, node1) != NULL
                                    || stSortedSet_search(componentNodes, node2)
                                            != NULL);
                    if (stSortedSet_search(componentNodes, node1) == NULL
                            || stSortedSet_search(componentNodes, node2)
                                    == NULL) {
                        stList_append(bridgingAdjacencyEdges, edge);
                    }
                    stIntTuple_destruct(node1);
                    stIntTuple_destruct(node2);
                }
            }
        }
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(componentNodes);
    }

    return bridgingAdjacencyEdges;
}
stList *getComponents2(stList *adjacencyEdges, stList *stubEdges,
        stList *chainEdges) {
    /*
     * Gets a list of connected components for a set of adjacency, stub and chain edges.
     * If adjacencyEdges, stubEdges or chainEdges are NULL then they are ignored.
     */
    stList *allEdges = stList_construct(); //Build a concatenated list of all the chain, stub and adjacency edges.
    if (adjacencyEdges != NULL) {
        stList_appendAll(allEdges, adjacencyEdges);
    }
    if (stubEdges != NULL) {
        stList_appendAll(allEdges, stubEdges);
    }
    if (chainEdges != NULL) {
        stList_appendAll(allEdges, chainEdges);
    }
    stList *components = getComponents(allEdges); //Gets the graph components.
    stList_destruct(allEdges); //Cleanup the all edges.
    return components;
}
Пример #18
0
static void getMaximalHaplotypePathsP(Flower *flower,
        stList *maximalHaplotypePaths, stSortedSet *segmentSet,
        const char *eventString,
        stList *eventStrings) {
    /*
     *  Iterate through the segments in this flower.
     */
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
        if (stSortedSet_search(segmentSet, segment) == NULL
                && stSortedSet_search(segmentSet, segment_getReverse(segment))
                        == NULL) { //Check we haven't yet seen this segment
            if (strcmp(event_getHeader(segment_getEvent(segment)), eventString)
                    == 0) { //Check if the segment is in the assembly
                if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //Is a block in a haplotype segment
                    assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
                    stList *maximalHaplotypePath = stList_construct();
                    stList_append(maximalHaplotypePaths, maximalHaplotypePath);
                    getMaximalHaplotypePathsP2(segment, maximalHaplotypePath,
                            segmentSet, eventStrings);
                } else {
                    assert(!hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings));//assert(!isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
                }
            }
        }
    }
    flower_destructSegmentIterator(segmentIt);
    /*
     * Now recurse on the contained flowers.
     */
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMaximalHaplotypePathsP(group_getNestedFlower(group),
                    maximalHaplotypePaths, segmentSet, eventString, eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
stList *makeMatchingObeyCyclicConstraints(stSortedSet *nodes,
        stList *chosenEdges,
        stSortedSet *allAdjacencyEdges, stList *nonZeroWeightAdjacencyEdges,
        stList *stubEdges, stList *chainEdges,
        bool makeStubCyclesDisjoint) {
    if (stSortedSet_size(nodes) == 0) { //Some of the following functions assume there are at least 2 nodes.
        return stList_construct();
    }

    /*
     * Merge in the stub free components.
     */
    chosenEdges = mergeSimpleCycles2(chosenEdges,
            nonZeroWeightAdjacencyEdges, allAdjacencyEdges, stubEdges,
            chainEdges);

    st_logDebug(
            "After merging in chain only cycles the matching has %" PRIi64 " edges, %" PRIi64 " cardinality and %" PRIi64 " weight\n",
            stList_length(chosenEdges), matchingCardinality(chosenEdges),
            matchingWeight(chosenEdges));

    /*
     * Split stub components.
     */
    if (makeStubCyclesDisjoint) {
        stList *updatedChosenEdges = splitMultipleStubCycles(chosenEdges,
                nonZeroWeightAdjacencyEdges, allAdjacencyEdges, stubEdges,
                chainEdges);
        stList_destruct(chosenEdges);
        chosenEdges = updatedChosenEdges;
        st_logDebug(
                "After making stub cycles disjoint the matching has %" PRIi64 " edges, %" PRIi64 " cardinality and %" PRIi64 " weight\n",
                stList_length(chosenEdges), matchingCardinality(chosenEdges),
                matchingWeight(chosenEdges));
    } else {
        st_logDebug("Not making stub cycles disjoint\n");
    }

    return chosenEdges;
}
Пример #20
0
static stList *getCaps(stList *flowers, Name referenceEventName) {
    stList *caps = stList_construct();
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        //Get list of caps
        Flower_EndIterator *endIt = flower_getEndIterator(flower);
        End *end;
        while ((end = flower_getNextEnd(endIt)) != NULL) {
            if (end_isStubEnd(end)) {
                Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference
                if(cap != NULL) {
                    cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                    if (!cap_getSide(cap)) {
                        stList_append(caps, cap);
                    }
                }
            }
        }
        flower_destructEndIterator(endIt);
    }
    return caps;
}
stList *getPerfectMatching(stSortedSet *nodes,
        stList *adjacencyEdges,
        stList *(*matchingAlgorithm)(stList *edges, int64_t nodeNumber)) {

    checkEdges(adjacencyEdges, nodes, 1, 1); //Checks edges are clique

    if (stSortedSet_size(nodes) == 0) { //Some of the following functions assume there are at least 2 nodes.
        return stList_construct();
    }

    stList *nonZeroWeightAdjacencyEdges = getEdgesWithGreaterThanZeroWeight(
                adjacencyEdges);
    stList *chosenEdges = getSparseMatching(nodes, nonZeroWeightAdjacencyEdges, matchingAlgorithm);
    stList_destruct(nonZeroWeightAdjacencyEdges);
    makeMatchingPerfect(chosenEdges, adjacencyEdges, nodes);

    st_logDebug(
                "Chosen a perfect matching with %" PRIi64 " edges, %" PRIi64 " cardinality and %" PRIi64 " weight\n",
                stList_length(chosenEdges), matchingCardinality(chosenEdges),
                matchingWeight(chosenEdges));

    return chosenEdges;
}
Пример #22
0
/* Get a list of components that overlap the specified guide range and are in
 * blocks not flagged as dying and matches treeLoc filters.  Return NULL if no
 * overlaps.  List is sorted by ascending width, which helps the merge
 * efficiency.  */
stList *malnSet_getOverlappingComps(struct malnSet *malnSet, struct Seq *seq, int chromStart, int chromEnd, unsigned treeLocFilter) {
    if (malnSet->compRangeMap == NULL) {
        buildRangeTree(malnSet);
    }
    stList *overComps = NULL;
    for (struct range *rng = genomeRangeTreeAllOverlapping(malnSet->compRangeMap, seq->orgSeqName, chromStart, chromEnd); rng != NULL; rng = rng->next) {
        for (struct slRef *compRef = rng->val; compRef != NULL; compRef = compRef->next) {
            struct malnComp *comp = compRef->val;
            if (keepOverlap(comp, seq, chromStart, chromEnd, treeLocFilter)) {
                if (overComps == NULL) { 
                    overComps = stList_construct();
                }
                stList_append(overComps, comp);
            }
        }
    }

    // sort so tests are reproducible
    if (overComps != NULL) {
        stList_sort(overComps, sortCompListCmpFn);
    }
    return overComps;
}
int main(int argc, char *argv[]) {
    /*
     * Script for adding a reference genome to a flower.
     */

    /*
     * Arguments/options
     */
    char * logLevelString = NULL;
    char * cactusDiskDatabaseString = NULL;
    char * secondaryDatabaseString = NULL;
    char *referenceEventString = (char *) cactusMisc_getDefaultReferenceEventHeader();
    bool bottomUpPhase = 0;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "secondaryDisk", required_argument, 0, 'd' }, { "referenceEventString", required_argument, 0, 'g' }, { "help", no_argument,
                0, 'h' }, { "bottomUpPhase", no_argument, 0, 'j' }, { 0, 0, 0, 0 } };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:c:d:e:g:hi:j", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'b':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'd':
                secondaryDatabaseString = stString_copy(optarg);
                break;
            case 'g':
                referenceEventString = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            case 'j':
                bottomUpPhase = 1;
                break;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    assert(cactusDiskDatabaseString != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    st_logInfo("referenceEventString = %s\n", referenceEventString);
    st_logInfo("bottomUpPhase = %i\n", bottomUpPhase);

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, false, true);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    st_logInfo("Set up the flower disk\n");

    stKVDatabase *sequenceDatabase = NULL;
    if (secondaryDatabaseString != NULL) {
        kvDatabaseConf = stKVDatabaseConf_constructFromString(secondaryDatabaseString);
        sequenceDatabase = stKVDatabase_construct(kvDatabaseConf, 0);
        stKVDatabaseConf_destruct(kvDatabaseConf);
    }

    FlowerStream *flowerStream = flowerWriter_getFlowerStream(cactusDisk, stdin);
    Flower *flower;
    while ((flower = flowerStream_getNext(flowerStream)) != NULL) {
        st_logDebug("Processing flower %" PRIi64 "\n", flower_getName(flower));

        ///////////////////////////////////////////////////////////////////////////
        // Get the appropriate event names
        ///////////////////////////////////////////////////////////////////////////

        st_logInfo("%s\n", eventTree_makeNewickString(flower_getEventTree(flower)));
        Event *referenceEvent = eventTree_getEventByHeader(flower_getEventTree(flower), referenceEventString);
        if (referenceEvent == NULL) {
            st_errAbort("Reference event %s not found in tree. Check your "
                        "--referenceEventString option", referenceEventString);
        }
        Name referenceEventName = event_getName(referenceEvent);

        ///////////////////////////////////////////////////////////////////////////
        // Now do bottom up or top down, depending
        ///////////////////////////////////////////////////////////////////////////
        stList *flowers = stList_construct();
        stList_append(flowers, flower);
        preCacheNestedFlowers(cactusDisk, flowers);
        if (bottomUpPhase) {
            assert(sequenceDatabase != NULL);

            cactusDisk_preCacheSegmentStrings(cactusDisk, flowers);
            bottomUp(flowers, sequenceDatabase, referenceEventName, !flower_hasParentGroup(flower), generateJukesCantorMatrix);

            // Unload the nested flowers to save memory. They haven't
            // been changed, so we don't write them to the cactus
            // disk.
            Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
            Group *group;
            while ((group = flower_getNextGroup(groupIt)) != NULL) {
                if (!group_isLeaf(group)) {
                    flower_unload(group_getNestedFlower(group));
                }
            }
            flower_destructGroupIterator(groupIt);
            assert(!flower_isParentLoaded(flower));

            // Write this flower to disk.
            cactusDisk_addUpdateRequest(cactusDisk, flower);
        } else {
            topDown(flower, referenceEventName);

            // We've changed the nested flowers, but not this
            // flower. We write the nested flowers to disk, then
            // unload them to save memory. This flower will be
            // unloaded by the flower-stream code.
            Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
            Group *group;
            while ((group = flower_getNextGroup(groupIt)) != NULL) {
                if (!group_isLeaf(group)) {
                    cactusDisk_addUpdateRequest(cactusDisk, group_getNestedFlower(group));
                    flower_unload(group_getNestedFlower(group));
                }
            }
            flower_destructGroupIterator(groupIt);
        }
        stList_destruct(flowers);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Write the flower(s) back to disk.
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_write(cactusDisk);
    st_logInfo("Updated the flower on disk\n");

    ///////////////////////////////////////////////////////////////////////////
    //Clean up.
    ///////////////////////////////////////////////////////////////////////////

    if (sequenceDatabase != NULL) {
        stKVDatabase_destruct(sequenceDatabase);
    }

    cactusDisk_destruct(cactusDisk);

    return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.

    free(cactusDiskDatabaseString);
    free(referenceEventString);
    free(logLevelString);

    st_logInfo("Cleaned stuff up and am finished\n");

    return 0;
}
Пример #24
0
static void cacheSubstringsFromDB(CactusDisk *cactusDisk, stList *substrings) {
    /*
     * Caches the given set of substrings in the cactusDisk cache.
     */
    if (cactusDisk->storeSequencesInAFile) {
        if (cactusDisk->sequencesReadFileHandle == NULL) {
            if(cactusDisk->sequencesWriteFileHandle != NULL) {
                fsync(fileno(cactusDisk->sequencesWriteFileHandle));
                fclose(cactusDisk->sequencesWriteFileHandle);
                cactusDisk->sequencesWriteFileHandle = NULL;
            }
            cactusDisk->sequencesReadFileHandle = fopen(cactusDisk->absSequencesFileName, "r");
            assert(cactusDisk->sequencesReadFileHandle != NULL);
        }
        else {
            assert(cactusDisk->sequencesWriteFileHandle == NULL);
        }
        for (int64_t i = 0; i < stList_length(substrings); i++) {
            Substring *substring = stList_get(substrings, i);
            char *string = getStringFromDisk(cactusDisk->sequencesReadFileHandle, substring->name, substring->start,
                    substring->length);
            stCache_setRecord(cactusDisk->stringCache, substring->name, substring->start, substring->length, string);
#ifndef NDEBUG
            int64_t bytesRead;
            char *string2 = stCache_getRecord(cactusDisk->stringCache, substring->name, substring->start,
                    substring->length, &bytesRead);
            assert(bytesRead == substring->length);
            for (int64_t j = 0; j < substring->length; j++) {
                assert(string2[j] == string[j]);
            }
            free(string2);
#endif
            free(string);
        }
    } else {
        stList *getRequests = stList_construct3(0, free);
        for (int64_t i = 0; i < stList_length(substrings); i++) {
            Substring *substring = stList_get(substrings, i);
            int64_t intervalSize = (substring->length + substring->start - 1) / CACTUS_DISK_SEQUENCE_CHUNK_SIZE
                    - substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1;
            Name shiftedName = substring->name + substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE;
            for (int64_t j = 0; j < intervalSize; j++) {
                int64_t *k = st_malloc(sizeof(int64_t));
                k[0] = shiftedName + j;
                stList_append(getRequests, k);
            }
        }
        if (stList_length(getRequests) == 0) {
            stList_destruct(getRequests);
            return;
        }
        stList *records = NULL;
        stTry
            {
                records = stKVDatabase_bulkGetRecords(cactusDisk->database, getRequests);
            }
            stCatch(except)
                {
                    stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID,
                            "An unknown database error occurred when getting a sequence string");
                }stTryEnd
        ;
        assert(records != NULL);
        assert(stList_length(records) == stList_length(getRequests));
        stList_destruct(getRequests);
        stListIterator *recordsIt = stList_getIterator(records);
        for (int64_t i = 0; i < stList_length(substrings); i++) {
            Substring *substring = stList_get(substrings, i);
            int64_t intervalSize = (substring->length + substring->start - 1) / CACTUS_DISK_SEQUENCE_CHUNK_SIZE
                    - substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1;
            stList *strings = stList_construct();
            while (intervalSize-- > 0) {
                int64_t recordSize;
                stKVDatabaseBulkResult *result = stList_getNext(recordsIt);
                assert(result != NULL);
                char *string = stKVDatabaseBulkResult_getRecord(result, &recordSize);
                assert(string != NULL);
                assert(strlen(string) == recordSize - 1);
                stList_append(strings, string);
                assert(recordSize <= CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1);
            }
            assert(stList_length(strings) > 0);
            char *joinedString = stString_join2("", strings);
            stCache_setRecord(cactusDisk->stringCache, substring->name,
                    (substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE) * CACTUS_DISK_SEQUENCE_CHUNK_SIZE,
                    strlen(joinedString), joinedString);
            free(joinedString);
            stList_destruct(strings);
        }
        assert(stList_getNext(recordsIt) == NULL);
        stList_destructIterator(recordsIt);
        stList_destruct(records);
    }
}
Пример #25
0
int main(int argc, char *argv[]) {
    /*
     * Script for adding alignments to cactus tree.
     */
    int64_t startTime;
    stKVDatabaseConf *kvDatabaseConf;
    CactusDisk *cactusDisk;
    int key, k;

    bool (*filterFn)(stPinchSegment *, stPinchSegment *) = NULL;
    stSet *outgroupThreads = NULL;

    /*
     * Arguments/options
     */
    char * logLevelString = NULL;
    char * alignmentsFile = NULL;
    char * constraintsFile = NULL;
    char * cactusDiskDatabaseString = NULL;
    char * lastzArguments = "";
    int64_t minimumSequenceLengthForBlast = 1;

    //Parameters for annealing/melting rounds
    int64_t *annealingRounds = NULL;
    int64_t annealingRoundsLength = 0;
    int64_t *meltingRounds = NULL;
    int64_t meltingRoundsLength = 0;

    //Parameters for melting
    float maximumAdjacencyComponentSizeRatio = 10;
    int64_t blockTrim = 0;
    int64_t alignmentTrimLength = 0;
    int64_t *alignmentTrims = NULL;
    int64_t chainLengthForBigFlower = 1000000;
    int64_t longChain = 2;
    int64_t minLengthForChromosome = 1000000;
    float proportionOfUnalignedBasesForNewChromosome = 0.8;
    bool breakChainsAtReverseTandems = 1;
    int64_t maximumMedianSequenceLengthBetweenLinkedEnds = INT64_MAX;
    bool realign = 0;
    char *realignArguments = "";
    bool removeRecoverableChains = false;
    bool (*recoverableChainsFilter)(stCactusEdgeEnd *, Flower *) = NULL;
    int64_t maxRecoverableChainsIterations = 1;
    int64_t maxRecoverableChainLength = INT64_MAX;

    //Parameters for removing ancient homologies
    bool doPhylogeny = false;
    int64_t phylogenyNumTrees = 1;
    enum stCaf_RootingMethod phylogenyRootingMethod = BEST_RECON;
    enum stCaf_ScoringMethod phylogenyScoringMethod = COMBINED_LIKELIHOOD;
    double breakpointScalingFactor = 1.0;
    bool phylogenySkipSingleCopyBlocks = 0;
    int64_t phylogenyMaxBaseDistance = 1000;
    int64_t phylogenyMaxBlockDistance = 100;
    bool phylogenyKeepSingleDegreeBlocks = 0;
    stList *phylogenyTreeBuildingMethods = stList_construct();
    enum stCaf_TreeBuildingMethod defaultMethod = GUIDED_NEIGHBOR_JOINING;
    stList_append(phylogenyTreeBuildingMethods, &defaultMethod);
    double phylogenyCostPerDupPerBase = 0.2;
    double phylogenyCostPerLossPerBase = 0.2;
    const char *debugFileName = NULL;
    const char *referenceEventHeader = NULL;
    double phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce = 1.0;
    int64_t numTreeBuildingThreads = 2;
    int64_t minimumBlockDegreeToCheckSupport = 10;
    double minimumBlockHomologySupport = 0.7;
    double nucleotideScalingFactor = 1.0;
    HomologyUnitType phylogenyHomologyUnitType = BLOCK;
    enum stCaf_DistanceCorrectionMethod phylogenyDistanceCorrectionMethod = JUKES_CANTOR;
    bool sortAlignments = false;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "alignments", required_argument, 0, 'b' }, {
                "cactusDisk", required_argument, 0, 'c' }, { "lastzArguments", required_argument, 0, 'd' },
                { "help", no_argument, 0, 'h' }, { "annealingRounds", required_argument, 0, 'i' }, { "trim", required_argument, 0, 'k' }, {
                        "trimChange", required_argument, 0, 'l', }, { "minimumTreeCoverage", required_argument, 0, 'm' }, { "blockTrim",
                        required_argument, 0, 'n' }, { "deannealingRounds", required_argument, 0, 'o' }, { "minimumDegree",
                        required_argument, 0, 'p' }, { "minimumIngroupDegree", required_argument, 0, 'q' }, {
                        "minimumOutgroupDegree", required_argument, 0, 'r' }, { "alignmentFilter", required_argument, 0, 't' }, {
                        "minimumSequenceLengthForBlast", required_argument, 0, 'v' }, { "maxAdjacencyComponentSizeRatio",
                        required_argument, 0, 'w' }, { "constraints", required_argument, 0, 'x' }, { "minLengthForChromosome",
                        required_argument, 0, 'y' }, { "proportionOfUnalignedBasesForNewChromosome", required_argument, 0, 'z' },
                        { "maximumMedianSequenceLengthBetweenLinkedEnds", required_argument, 0, 'A' },
                        { "realign", no_argument, 0, 'B' }, { "realignArguments", required_argument, 0, 'C' },
                        { "phylogenyNumTrees", required_argument, 0, 'D' },
                        { "phylogenyRootingMethod", required_argument, 0, 'E' },
                        { "phylogenyScoringMethod", required_argument, 0, 'F' },
                        { "phylogenyBreakpointScalingFactor", required_argument, 0, 'G' },
                        { "phylogenySkipSingleCopyBlocks", no_argument, 0, 'H' },
                        { "phylogenyMaxBaseDistance", required_argument, 0, 'I' },
                        { "phylogenyMaxBlockDistance", required_argument, 0, 'J' },
                        { "phylogenyDebugFile", required_argument, 0, 'K' },
                        { "phylogenyKeepSingleDegreeBlocks", no_argument, 0, 'L' },
                        { "phylogenyTreeBuildingMethod", required_argument, 0, 'M' },
                        { "phylogenyCostPerDupPerBase", required_argument, 0, 'N' },
                        { "phylogenyCostPerLossPerBase", required_argument, 0, 'O' },
                        { "referenceEventHeader", required_argument, 0, 'P' },
                        { "phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce", required_argument, 0, 'Q' },
                        { "numTreeBuildingThreads", required_argument, 0, 'R' },
                        { "phylogeny", no_argument, 0, 'S' },
                        { "minimumBlockHomologySupport", required_argument, 0, 'T' },
                        { "phylogenyNucleotideScalingFactor", required_argument, 0, 'U' },
                        { "minimumBlockDegreeToCheckSupport", required_argument, 0, 'V' },
                        { "removeRecoverableChains", required_argument, 0, 'W' },
                        { "minimumNumberOfSpecies", required_argument, 0, 'X' },
                        { "phylogenyHomologyUnitType", required_argument, 0, 'Y' },
                        { "phylogenyDistanceCorrectionMethod", required_argument, 0, 'Z' },
                        { "maxRecoverableChainsIterations", required_argument, 0, '1' },
                        { "maxRecoverableChainLength", required_argument, 0, '2' },
                        { 0, 0, 0, 0 } };

        int option_index = 0;

        key = getopt_long(argc, argv, "a:b:c:hi:k:m:n:o:p:q:r:stv:w:x:y:z:A:BC:D:E:", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                st_setLogLevelFromString(logLevelString);
                break;
            case 'b':
                alignmentsFile = stString_copy(optarg);
                break;
            case 'c':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'd':
                lastzArguments = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            case 'i':
                annealingRounds = getInts(optarg, &annealingRoundsLength);
                break;
            case 'o':
                meltingRounds = getInts(optarg, &meltingRoundsLength);
                break;
            case 'k':
                alignmentTrims = getInts(optarg, &alignmentTrimLength);
                break;
            case 'm':
                k = sscanf(optarg, "%f", &minimumTreeCoverage);
                assert(k == 1);
                break;
            case 'n':
                k = sscanf(optarg, "%" PRIi64 "", &blockTrim);
                assert(k == 1);
                break;
            case 'p':
                k = sscanf(optarg, "%" PRIi64 "", &minimumDegree);
                assert(k == 1);
                break;
            case 'q':
                k = sscanf(optarg, "%" PRIi64 "", &minimumIngroupDegree);
                assert(k == 1);
                break;
            case 'r':
                k = sscanf(optarg, "%" PRIi64 "", &minimumOutgroupDegree);
                assert(k == 1);
                break;
            case 't':
                if (strcmp(optarg, "singleCopyOutgroup") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_filterByOutgroup;
                } else if (strcmp(optarg, "relaxedSingleCopyOutgroup") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_relaxedFilterByOutgroup;
                } else if (strcmp(optarg, "singleCopy") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_filterByRepeatSpecies;
                } else if (strcmp(optarg, "relaxedSingleCopy") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_relaxedFilterByRepeatSpecies;
                } else if (strcmp(optarg, "singleCopyChr") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_singleCopyChr;
                } else if (strcmp(optarg, "singleCopyIngroup") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_singleCopyIngroup;
                } else if (strcmp(optarg, "relaxedSingleCopyIngroup") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_relaxedSingleCopyIngroup;
                } else if (strcmp(optarg, "none") == 0) {
                    sortAlignments = false;
                    filterFn = NULL;
                } else {
                    st_errAbort("Could not recognize alignmentFilter option %s", optarg);
                }
                break;
            case 'v':
                k = sscanf(optarg, "%" PRIi64 "", &minimumSequenceLengthForBlast);
                assert(k == 1);
                break;
            case 'w':
                k = sscanf(optarg, "%f", &maximumAdjacencyComponentSizeRatio);
                assert(k == 1);
                break;
            case 'x':
                constraintsFile = stString_copy(optarg);
                break;
            case 'y':
                k = sscanf(optarg, "%" PRIi64 "", &minLengthForChromosome);
                assert(k == 1);
                break;
            case 'z':
                k = sscanf(optarg, "%f", &proportionOfUnalignedBasesForNewChromosome);
                assert(k == 1);
                break;
            case 'A':
                k = sscanf(optarg, "%" PRIi64 "", &maximumMedianSequenceLengthBetweenLinkedEnds);
                assert(k == 1);
                break;
            case 'B':
                realign = 1;
                break;
            case 'C':
                realignArguments = stString_copy(optarg);
                break;
            case 'D':
                k = sscanf(optarg, "%" PRIi64, &phylogenyNumTrees);
                assert(k == 1);
                break;
            case 'E':
                if (!strcmp(optarg, "outgroupBranch")) {
                    phylogenyRootingMethod = OUTGROUP_BRANCH;
                } else if (!strcmp(optarg, "longestBranch")) {
                    phylogenyRootingMethod = LONGEST_BRANCH;
                } else if (!strcmp(optarg, "bestRecon")) {
                    phylogenyRootingMethod = BEST_RECON;
                } else {
                    st_errAbort("Invalid tree rooting method: %s", optarg);
                }
                break;
            case 'F':
                if (!strcmp(optarg, "reconCost")) {
                    phylogenyScoringMethod = RECON_COST;
                } else if (!strcmp(optarg, "nucLikelihood")) {
                    phylogenyScoringMethod = NUCLEOTIDE_LIKELIHOOD;
                } else if (!strcmp(optarg, "reconLikelihood")) {
                    phylogenyScoringMethod = RECON_LIKELIHOOD;
                } else if (!strcmp(optarg, "combinedLikelihood")) {
                    phylogenyScoringMethod = COMBINED_LIKELIHOOD;
                } else {
                    st_errAbort("Invalid tree scoring method: %s", optarg);
                }
                break;
            case 'G':
                k = sscanf(optarg, "%lf", &breakpointScalingFactor);
                assert(k == 1);
                break;
            case 'H':
                phylogenySkipSingleCopyBlocks = true;
                break;
            case 'I':
                k = sscanf(optarg, "%" PRIi64, &phylogenyMaxBaseDistance);
                assert(k == 1);
                break;
            case 'J':
                k = sscanf(optarg, "%" PRIi64, &phylogenyMaxBlockDistance);
                assert(k == 1);
                break;
            case 'K':
                debugFileName = stString_copy(optarg);
                break;
            case 'L':
                phylogenyKeepSingleDegreeBlocks = true;
                break;
            case 'M':
                // clear the default setting of the list
                stList_destruct(phylogenyTreeBuildingMethods);
                phylogenyTreeBuildingMethods = stList_construct();
                stList *methodStrings = stString_splitByString(optarg, ",");

                for (int64_t i = 0; i < stList_length(methodStrings); i++) {
                    char *methodString = stList_get(methodStrings, i);
                    enum stCaf_TreeBuildingMethod *method = st_malloc(sizeof(enum stCaf_TreeBuildingMethod));
                    if (strcmp(methodString, "neighborJoining") == 0) {
                        *method = NEIGHBOR_JOINING;
                    } else if (strcmp(methodString, "guidedNeighborJoining") == 0) {
                        *method = GUIDED_NEIGHBOR_JOINING;
                    } else if (strcmp(methodString, "splitDecomposition") == 0) {
                        *method = SPLIT_DECOMPOSITION;
                    } else if (strcmp(methodString, "strictSplitDecomposition") == 0) {
                        *method = STRICT_SPLIT_DECOMPOSITION;
                    } else if (strcmp(methodString, "removeBadChains") == 0) {
                        *method = REMOVE_BAD_CHAINS;
                    } else {
                        st_errAbort("Unknown tree building method: %s", methodString);
                    }
                    stList_append(phylogenyTreeBuildingMethods, method);
                }
                stList_destruct(methodStrings);
                break;
            case 'N':
                k = sscanf(optarg, "%lf", &phylogenyCostPerDupPerBase);
                assert(k == 1);
                break;
            case 'O':
                k = sscanf(optarg, "%lf", &phylogenyCostPerLossPerBase);
                assert(k == 1);
                break;
            case 'P':
                referenceEventHeader = stString_copy(optarg);
                break;
            case 'Q':
                k = sscanf(optarg, "%lf", &phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce);
                assert(k == 1);
                break;
            case 'R':
                k = sscanf(optarg, "%" PRIi64, &numTreeBuildingThreads);
                assert(k == 1);
                break;
            case 'S':
                doPhylogeny = true;
                break;
            case 'T':
                k = sscanf(optarg, "%lf", &minimumBlockHomologySupport);
                assert(k == 1);
                assert(minimumBlockHomologySupport <= 1.0);
                assert(minimumBlockHomologySupport >= 0.0);
                break;
            case 'U':
                k = sscanf(optarg, "%lf", &nucleotideScalingFactor);
                assert(k == 1);
                break;
            case 'V':
                k = sscanf(optarg, "%" PRIi64, &minimumBlockDegreeToCheckSupport);
                assert(k == 1);
                break;
            case 'W':
                if (strcmp(optarg, "1") == 0) {
                    removeRecoverableChains = true;
                    recoverableChainsFilter = NULL;
                } else if (strcmp(optarg, "unequalNumberOfIngroupCopies") == 0) {
                    removeRecoverableChains = true;
                    recoverableChainsFilter = stCaf_chainHasUnequalNumberOfIngroupCopies;
                } else if (strcmp(optarg, "unequalNumberOfIngroupCopiesOrNoOutgroup") == 0) {
                    removeRecoverableChains = true;
                    recoverableChainsFilter = stCaf_chainHasUnequalNumberOfIngroupCopiesOrNoOutgroup;
                } else if (strcmp(optarg, "0") == 0) {
                    removeRecoverableChains = false;
                } else {
                    st_errAbort("Could not parse removeRecoverableChains argument");
                }
                break;
            case 'X':
                k = sscanf(optarg, "%" PRIi64, &minimumNumberOfSpecies);
                if (k != 1) {
                    st_errAbort("Error parsing the minimumNumberOfSpecies argument");
                }
                break;
            case 'Y':
                if (strcmp(optarg, "chain") == 0) {
                    phylogenyHomologyUnitType = CHAIN;
                } else if (strcmp(optarg, "block") == 0) {
                    phylogenyHomologyUnitType = BLOCK;
                } else {
                    st_errAbort("Could not parse the phylogenyHomologyUnitType argument");
                }
                break;
            case 'Z':
                if (strcmp(optarg, "jukesCantor") == 0) {
                    phylogenyDistanceCorrectionMethod = JUKES_CANTOR;
                } else if (strcmp(optarg, "none") == 0 ) {
                    phylogenyDistanceCorrectionMethod = NONE;
                } else {
                    st_errAbort("Could not parse the phylogenyDistanceCorrectionMethod argument");
                }
                break;
            case '1':
                k = sscanf(optarg, "%" PRIi64, &maxRecoverableChainsIterations);
                if (k != 1) {
                    st_errAbort("Error parsing the maxRecoverableChainsIterations argument");
                }
                break;
            case '2':
                k = sscanf(optarg, "%" PRIi64, &maxRecoverableChainLength);
                if (k != 1) {
                    st_errAbort("Error parsing the maxRecoverableChainLength argument");
                }
                break;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    assert(cactusDiskDatabaseString != NULL);
    assert(minimumTreeCoverage >= 0.0);
    assert(minimumTreeCoverage <= 1.0);
    assert(blockTrim >= 0);
    assert(annealingRoundsLength >= 0);
    for (int64_t i = 0; i < annealingRoundsLength; i++) {
        assert(annealingRounds[i] >= 0);
    }
    assert(meltingRoundsLength >= 0);
    for (int64_t i = 1; i < meltingRoundsLength; i++) {
        assert(meltingRounds[i - 1] < meltingRounds[i]);
        assert(meltingRounds[i - 1] >= 1);
    }
    assert(alignmentTrimLength >= 0);
    for (int64_t i = 0; i < alignmentTrimLength; i++) {
        assert(alignmentTrims[i] >= 0);
    }
    assert(minimumOutgroupDegree >= 0);
    assert(minimumIngroupDegree >= 0);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString);

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    st_logInfo("Set up the flower disk\n");

    ///////////////////////////////////////////////////////////////////////////
    // Sort the constraints
    ///////////////////////////////////////////////////////////////////////////

    stPinchIterator *pinchIteratorForConstraints = NULL;
    if (constraintsFile != NULL) {
        pinchIteratorForConstraints = stPinchIterator_constructFromFile(constraintsFile);
        st_logInfo("Created an iterator for the alignment constaints from file: %s\n", constraintsFile);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Do the alignment
    ///////////////////////////////////////////////////////////////////////////

    startTime = time(NULL);

    stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk);
    if (alignmentsFile == NULL) {
        cactusDisk_preCacheStrings(cactusDisk, flowers);
    }
    char *tempFile1 = NULL;
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        flower = stList_get(flowers, i);
        if (!flower_builtBlocks(flower)) { // Do nothing if the flower already has defined blocks
            st_logDebug("Processing flower: %lli\n", flower_getName(flower));

            stCaf_setFlowerForAlignmentFiltering(flower);

            //Set up the graph and add the initial alignments
            stPinchThreadSet *threadSet = stCaf_setup(flower);

            //Build the set of outgroup threads
            outgroupThreads = stCaf_getOutgroupThreads(flower, threadSet);

            //Setup the alignments
            stPinchIterator *pinchIterator;
            stList *alignmentsList = NULL;
            if (alignmentsFile != NULL) {
                assert(i == 0);
                assert(stList_length(flowers) == 1);
                if (sortAlignments) {
                    tempFile1 = getTempFile();
                    stCaf_sortCigarsFileByScoreInDescendingOrder(alignmentsFile, tempFile1);
                    pinchIterator = stPinchIterator_constructFromFile(tempFile1);
                } else {
                    pinchIterator = stPinchIterator_constructFromFile(alignmentsFile);
                }
            } else {
                if (tempFile1 == NULL) {
                    tempFile1 = getTempFile();
                }
                alignmentsList = stCaf_selfAlignFlower(flower, minimumSequenceLengthForBlast, lastzArguments, realign, realignArguments, tempFile1);
                if (sortAlignments) {
                    stCaf_sortCigarsByScoreInDescendingOrder(alignmentsList);
                }
                st_logDebug("Ran lastz and have %" PRIi64 " alignments\n", stList_length(alignmentsList));
                pinchIterator = stPinchIterator_constructFromList(alignmentsList);
            }

            for (int64_t annealingRound = 0; annealingRound < annealingRoundsLength; annealingRound++) {
                int64_t minimumChainLength = annealingRounds[annealingRound];
                int64_t alignmentTrim = annealingRound < alignmentTrimLength ? alignmentTrims[annealingRound] : 0;
                st_logDebug("Starting annealing round with a minimum chain length of %" PRIi64 " and an alignment trim of %" PRIi64 "\n", minimumChainLength, alignmentTrim);
                stPinchIterator_setTrim(pinchIterator, alignmentTrim);

                //Add back in the constraints
                if (pinchIteratorForConstraints != NULL) {
                    stCaf_anneal(threadSet, pinchIteratorForConstraints, filterFn);
                }

                //Do the annealing
                if (annealingRound == 0) {
                    stCaf_anneal(threadSet, pinchIterator, filterFn);
                } else {
                    stCaf_annealBetweenAdjacencyComponents(threadSet, pinchIterator, filterFn);
                }

                // Dump the block degree and length distribution to a file
                if (debugFileName != NULL) {
                    dumpBlockInfo(threadSet, stString_print("%s-blockStats-preMelting", debugFileName));
                }

                printf("Sequence graph statistics after annealing:\n");
                printThreadSetStatistics(threadSet, flower, stdout);

                // Check for poorly-supported blocks--those that have
                // been transitively aligned together but with very
                // few homologies supporting the transitive
                // alignment. These "megablocks" can snarl up the
                // graph so that a lot of extra gets thrown away in
                // the first melting step.
                stPinchThreadSetBlockIt blockIt = stPinchThreadSet_getBlockIt(threadSet);
                stPinchBlock *block;
                while ((block = stPinchThreadSetBlockIt_getNext(&blockIt)) != NULL) {
                    if (stPinchBlock_getDegree(block) > minimumBlockDegreeToCheckSupport) {
                        uint64_t supportingHomologies = stPinchBlock_getNumSupportingHomologies(block);
                        uint64_t possibleSupportingHomologies = numPossibleSupportingHomologies(block, flower);
                        double support = ((double) supportingHomologies) / possibleSupportingHomologies;
                        if (support < minimumBlockHomologySupport) {
                            fprintf(stdout, "Destroyed a megablock with degree %" PRIi64
                                    " and %" PRIi64 " supporting homologies out of a maximum "
                                    "of %" PRIi64 " (%lf%%).\n", stPinchBlock_getDegree(block),
                                    supportingHomologies, possibleSupportingHomologies, support);
                            stPinchBlock_destruct(block);
                        }
                    }
                }

                //Do the melting rounds
                for (int64_t meltingRound = 0; meltingRound < meltingRoundsLength; meltingRound++) {
                    int64_t minimumChainLengthForMeltingRound = meltingRounds[meltingRound];
                    st_logDebug("Starting melting round with a minimum chain length of %" PRIi64 " \n", minimumChainLengthForMeltingRound);
                    if (minimumChainLengthForMeltingRound >= minimumChainLength) {
                        break;
                    }
                    stCaf_melt(flower, threadSet, NULL, 0, minimumChainLengthForMeltingRound, 0, INT64_MAX);
                } st_logDebug("Last melting round of cycle with a minimum chain length of %" PRIi64 " \n", minimumChainLength);
                stCaf_melt(flower, threadSet, NULL, 0, minimumChainLength, breakChainsAtReverseTandems, maximumMedianSequenceLengthBetweenLinkedEnds);
                //This does the filtering of blocks that do not have the required species/tree-coverage/degree.
                stCaf_melt(flower, threadSet, blockFilterFn, blockTrim, 0, 0, INT64_MAX);
            }

            if (removeRecoverableChains) {
                stCaf_meltRecoverableChains(flower, threadSet, breakChainsAtReverseTandems, maximumMedianSequenceLengthBetweenLinkedEnds, recoverableChainsFilter, maxRecoverableChainsIterations, maxRecoverableChainLength);
            }
            if (debugFileName != NULL) {
                dumpBlockInfo(threadSet, stString_print("%s-blockStats-postMelting", debugFileName));
            }

            printf("Sequence graph statistics after melting:\n");
            printThreadSetStatistics(threadSet, flower, stdout);

            // Build a tree for each block, then use each tree to
            // partition the homologies between the ingroups sequences
            // into those that occur before the speciation with the
            // outgroup and those which occur late.

            if (stSet_size(outgroupThreads) > 0 && doPhylogeny) {
                st_logDebug("Starting to build trees and partition ingroup homologies\n");
                stHash *threadStrings = stCaf_getThreadStrings(flower, threadSet);
                st_logDebug("Got sets of thread strings and set of threads that are outgroups\n");
                stCaf_PhylogenyParameters params;
                params.distanceCorrectionMethod = phylogenyDistanceCorrectionMethod;
                params.treeBuildingMethods = phylogenyTreeBuildingMethods;
                params.rootingMethod = phylogenyRootingMethod;
                params.scoringMethod = phylogenyScoringMethod;
                params.breakpointScalingFactor = breakpointScalingFactor;
                params.nucleotideScalingFactor = nucleotideScalingFactor;
                params.skipSingleCopyBlocks = phylogenySkipSingleCopyBlocks;
                params.keepSingleDegreeBlocks = phylogenyKeepSingleDegreeBlocks;
                params.costPerDupPerBase = phylogenyCostPerDupPerBase;
                params.costPerLossPerBase = phylogenyCostPerLossPerBase;
                params.maxBaseDistance = phylogenyMaxBaseDistance;
                params.maxBlockDistance = phylogenyMaxBlockDistance;
                params.numTrees = phylogenyNumTrees;
                params.ignoreUnalignedBases = 1;
                params.onlyIncludeCompleteFeatureBlocks = 0;
                params.doSplitsWithSupportHigherThanThisAllAtOnce = phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce;
                params.numTreeBuildingThreads = numTreeBuildingThreads;

                assert(params.numTreeBuildingThreads >= 1);

                stCaf_buildTreesToRemoveAncientHomologies(
                    threadSet, phylogenyHomologyUnitType, threadStrings, outgroupThreads, flower, &params,
                    debugFileName == NULL ? NULL : stString_print("%s-phylogeny", debugFileName), referenceEventHeader);
                stHash_destruct(threadStrings);
                st_logDebug("Finished building trees\n");

                if (removeRecoverableChains) {
                    // We melt recoverable chains after splitting, as
                    // well as before, to alleviate coverage loss
                    // caused by bad splits.
                    stCaf_meltRecoverableChains(flower, threadSet, breakChainsAtReverseTandems, maximumMedianSequenceLengthBetweenLinkedEnds, recoverableChainsFilter, maxRecoverableChainsIterations, maxRecoverableChainLength);
                }

                // Enforce the block constraints on minimum degree,
                // etc. after splitting.
                stCaf_melt(flower, threadSet, blockFilterFn, 0, 0, 0, INT64_MAX);
            }

            //Sort out case when we allow blocks of degree 1
            if (minimumDegree < 2) {
                st_logDebug("Creating degree 1 blocks\n");
                stCaf_makeDegreeOneBlocks(threadSet);
                stCaf_melt(flower, threadSet, blockFilterFn, blockTrim, 0, 0, INT64_MAX);
            } else if (maximumAdjacencyComponentSizeRatio < INT64_MAX) { //Deal with giant components
                st_logDebug("Breaking up components greedily\n");
                stCaf_breakupComponentsGreedily(threadSet, maximumAdjacencyComponentSizeRatio);
            }

            //Finish up
            stCaf_finish(flower, threadSet, chainLengthForBigFlower, longChain, minLengthForChromosome,
                    proportionOfUnalignedBasesForNewChromosome); //Flower is then destroyed at this point.
            st_logInfo("Ran the cactus core script\n");

            //Cleanup
            stPinchThreadSet_destruct(threadSet);
            stPinchIterator_destruct(pinchIterator);
            stSet_destruct(outgroupThreads);

            if (alignmentsList != NULL) {
                stList_destruct(alignmentsList);
            }
            st_logInfo("Cleaned up from main loop\n");
        } else {
            st_logInfo("We've already built blocks / alignments for this flower\n");
        }
    }
    stList_destruct(flowers);
    if (tempFile1 != NULL) {
        st_system("rm %s", tempFile1);
    }

    if (constraintsFile != NULL) {
        stPinchIterator_destruct(pinchIteratorForConstraints);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Write the flower to disk.
    ///////////////////////////////////////////////////////////////////////////
    st_logDebug("Writing the flowers to disk\n");
    cactusDisk_write(cactusDisk);
    st_logInfo("Updated the flower on disk and %" PRIi64 " seconds have elapsed\n", time(NULL) - startTime);

    ///////////////////////////////////////////////////////////////////////////
    // Clean up.
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_destruct(cactusDisk);
}