stList *splitMultipleStubCycles(stList *chosenEdges,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges,
        stList *stubEdges, stList *chainEdges) {
    /*
     *  Returns an updated list of adjacency edges, such that each stub edge is a member of exactly one cycle.
     */

    /*
     * Calculate components.
     */
    stList *cycles = getComponents2(chosenEdges, stubEdges, chainEdges);

    /*
     * Find components with multiple stub edges.
     */
    stList *singleStubEdgeCycles = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    for (int64_t i = 0; i < stList_length(cycles); i++) {
        stList *subCycle = stList_get(cycles, i);
        stList *subAdjacencyEdges;
        stList *subStubEdges;
        stList *subChainEdges;
        splitIntoAdjacenciesStubsAndChains(subCycle,
                nonZeroWeightAdjacencyEdges, stubEdges, chainEdges,
                &subAdjacencyEdges, &subStubEdges, &subChainEdges);
        stList *splitCycles = splitMultipleStubCycle(subCycle,
                subAdjacencyEdges, allAdjacencyEdges, subStubEdges,
                subChainEdges);
        stList_appendAll(singleStubEdgeCycles, splitCycles);
        stList_setDestructor(splitCycles, NULL); //Do this to avoid destroying the underlying lists
        stList_destruct(splitCycles);
        stList_destruct(subAdjacencyEdges);
        stList_destruct(subStubEdges);
        stList_destruct(subChainEdges);
    }
    stList_destruct(cycles);

    /*
     * Remove the stub/chain edges from the components.
     */
    stSortedSet *stubAndChainEdgesSet = getSetOfMergedLists(stubEdges,
            chainEdges);
    stList *adjacencyOnlyComponents = filterListsToExclude(
            singleStubEdgeCycles, stubAndChainEdgesSet);
    stList_destruct(singleStubEdgeCycles);
    stSortedSet_destruct(stubAndChainEdgesSet);

    /*
     * Merge the adjacency edges in the components into a single list.
     */
    stList *updatedChosenEdges = stList_join(adjacencyOnlyComponents);
    stList_destruct(adjacencyOnlyComponents);

    return updatedChosenEdges;
}
コード例 #2
0
void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName,
              bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) {
    /*
     * A reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    stList *caps = getCaps(flowers, referenceEventName);
    for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list.
        setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps);
    }
    for(int64_t i=0; i<stList_length(flowers); i++) {
        recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName);
    }

    //Build the phylogenetic event trees for base calling.
    segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree);
    for(int64_t i=0; i<stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName);
        assert(refEvent != NULL);
        stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix));
    }

    if (isTop) {
        stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn,
                terminalAdjacencyWriteFn);
        assert(stList_length(threadStrings) == stList_length(caps));

        int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences.
        for (int64_t i = 0; i < stList_length(threadStrings); i++) {
            Cap *cap = stList_get(caps, i);
            assert(cap_getStrand(cap));
            assert(!cap_getSide(cap));
            Flower *flower = end_getFlower(cap_getEnd(cap));
            char *threadString = stList_get(threadStrings, i);
            bool trivialString = isTrivialString(&threadString); //This alters the original string
            MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++,
                    threadString, trivialString);
            free(threadString);
            int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1);
            (void) endCoordinate;
            assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence));
        }
        stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop
        stList_destruct(threadStrings);
    } else {
        buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn);
    }
    stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash);
    stList_destruct(caps);
}
コード例 #3
0
int main(int argc, char *argv[]) {
    //////////////////////////////////////////////
    //Parse the inputs
    //////////////////////////////////////////////

    parseBasicArguments(argc, argv, "linkageStats");

    ///////////////////////////////////////////////////////////////////////////
    // Get the intervals
    ///////////////////////////////////////////////////////////////////////////

    stList *haplotypeEventStrings = getEventStrings(
            treatHaplotype1AsContamination ? NULL : hap1EventString,
            treatHaplotype2AsContamination ? NULL : hap2EventString);
    stList *assemblyEventStringInList = stList_construct();
    stList_append(assemblyEventStringInList, assemblyEventString);

    stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct);
    for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) {
        const char *hapEventString = stList_get(haplotypeEventStrings, i);
        st_logInfo("Getting contig paths for haplotype: %s", hapEventString);
        stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList);
        stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString,
                assemblyEventStringInList);
        stList_destruct(contigPaths);
        st_logInfo("Getting contig paths\n");
        stList_appendAll(intervals, hapIntervals);
        stList_setDestructor(hapIntervals, NULL);
        stList_destruct(hapIntervals);
    }

    st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals));

    ///////////////////////////////////////////////////////////////////////////
    // Write it out.
    ///////////////////////////////////////////////////////////////////////////

    FILE *fileHandle = fopen(outputFile, "w");
    for (int64_t i = 0; i < stList_length(intervals); i++) {
        SequenceInterval *sequenceInterval = stList_get(intervals, i);
        st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
        fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName,
                sequenceInterval->start, sequenceInterval->end);
    }

    st_logInfo("Finished writing out the stats.\n");
    fclose(fileHandle);

    return 0;
}
コード例 #4
0
static stList *getRecords(CactusDisk *cactusDisk, stList *objectNames, char *type) {
    if (stList_length(objectNames) == 0) {
        return stList_construct3(0, NULL);
    }
    stList *records = NULL;
    stTry
        {
            records = stKVDatabase_bulkGetRecords(cactusDisk->database, objectNames);
        }
        stCatch(except)
            {
                stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID,
                        "An unknown database error occurred when getting a bulk set of %s", type);
            }stTryEnd
    ;
    assert(records != NULL);
    assert(stList_length(objectNames) == stList_length(records));
    stList_setDestructor(records, free);
    for (int64_t i = 0; i < stList_length(objectNames); i++) {
        Name objectName = *((int64_t *) stList_get(objectNames, i));
        int64_t recordSize;
        void *record;
        stKVDatabaseBulkResult *result = stList_get(records, i);
        assert(result != NULL);
        if (!stCache_containsRecord(cactusDisk->cache, objectName, 0, INT64_MAX)) {
            record = stKVDatabaseBulkResult_getRecord(result, &recordSize);
            assert(recordSize >= 0);
            assert(record != NULL);
            record = decompress(record, &recordSize);
            stCache_setRecord(cactusDisk->cache, objectName, 0, recordSize, record);
        } else {
            record = stCache_getRecord(cactusDisk->cache, objectName, 0, INT64_MAX, &recordSize);
            assert(recordSize >= 0);
            assert(record != NULL);
        }
        stKVDatabaseBulkResult_destruct(result);
        stList_set(records, i, record);
    }
    return records;
}
static stList *splitMultipleStubCycle(stList *cycle,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges,
        stList *stubEdges, stList *chainEdges) {
    /*
     *  Takes a simple cycle containing k stub edges and splits into k cycles, each containing 1 stub edge.
     */

    /*
     * Get sub-components containing only adjacency and chain edges.
     */

    stSortedSet *stubAndChainEdgesSet = getSetOfMergedLists(stubEdges,
            chainEdges);
    stList *adjacencyEdgeMatching =
            stList_filterToExclude(cycle, stubAndChainEdgesSet); //Filter out the the non-adjacency edges
    //Make it only the chain edges present in the original component
    stList *stubFreePaths = getComponents2(adjacencyEdgeMatching, NULL,
            chainEdges);
    stList_destruct(adjacencyEdgeMatching);
    assert(stList_length(stubFreePaths) >= 1);

    stList *splitCycles = stList_construct3(0,
            (void(*)(void *)) stList_destruct); //The list to return.


    if (stList_length(stubFreePaths) > 1) {
        /*
         * Build the list of adjacency edges acceptable in the merge
         */
        stSortedSet *oddNodes = getOddNodes(cycle);
        stList *oddToEvenNonZeroWeightAdjacencyEdges =
                getOddToEvenAdjacencyEdges(oddNodes,
                        nonZeroWeightAdjacencyEdges);
        stSortedSet *oddToEvenAllAdjacencyEdges = getOddToEvenAdjacencyEdges2(oddNodes, allAdjacencyEdges);

        /*
         * Merge together the best two components.
         */
        stList *l = filterListsToExclude(stubFreePaths, stubAndChainEdgesSet);
        doBestMergeOfTwoSimpleCycles(l, oddToEvenNonZeroWeightAdjacencyEdges,
                oddToEvenAllAdjacencyEdges); //This is inplace.
        stList *l2 = stList_join(l);
        stList_destruct(l);
        l = getComponents2(l2, stubEdges, chainEdges);
        assert(stList_length(l) == 2);
        stList_destruct(l2);

        /*
         * Cleanup
         */
        stSortedSet_destruct(oddNodes);
        stList_destruct(oddToEvenNonZeroWeightAdjacencyEdges);
        stSortedSet_destruct(oddToEvenAllAdjacencyEdges);

        /*
         * Call procedure recursively.
         */
        for (int64_t i = 0; i < stList_length(l); i++) {
            /*
             * Split into adjacency edges, stub edges and chain edges.
             */
            stList *subCycle = stList_get(l, i);
            stList *subAdjacencyEdges;
            stList *subStubEdges;
            stList *subChainEdges;
            splitIntoAdjacenciesStubsAndChains(subCycle,
                    nonZeroWeightAdjacencyEdges, stubEdges, chainEdges,
                    &subAdjacencyEdges, &subStubEdges, &subChainEdges);

            /*
             * Call recursively.
             */
            l2 = splitMultipleStubCycle(subCycle, subAdjacencyEdges,
                    allAdjacencyEdges, subStubEdges, subChainEdges);
            stList_appendAll(splitCycles, l2);

            /*
             * Clean up
             */
            stList_setDestructor(l2, NULL);
            stList_destruct(l2);
            stList_destruct(subAdjacencyEdges);
            stList_destruct(subStubEdges);
            stList_destruct(subChainEdges);
        }
        stList_destruct(l);
    } else {
        stList_append(splitCycles, stList_copy(cycle, NULL));
    }

    stSortedSet_destruct(stubAndChainEdgesSet);
    stList_destruct(stubFreePaths);

    return splitCycles;
}