static bool hashesAreEqual(stHash *observedHash, stHash *expectedHash) {
    stHashIterator *hit = stHash_getIterator(observedHash);
    char *key;
    while ((key = stHash_getNext(hit)) != NULL) {
        if (stHash_search(expectedHash, key) == NULL) {
            printBlockHash(observedHash, "observed");
            printBlockHash(expectedHash, "expected");
            return false;
        }
        if (!rowsAreEqual(stHash_search(observedHash, key), stHash_search(expectedHash, key))) {
            printBlockHash(observedHash, "observed");
            printBlockHash(expectedHash, "expected");
            return false;
        }
    }
    stHash_destructIterator(hit);
    hit = stHash_getIterator(expectedHash);
    while ((key = stHash_getNext(hit)) != NULL) {
        if (stHash_search(observedHash, key) == NULL) {
            printBlockHash(observedHash, "observed");
            printBlockHash(expectedHash, "expected");
            return false;
        }
        if (!rowsAreEqual(stHash_search(observedHash, key), stHash_search(expectedHash, key))) {
            printBlockHash(observedHash, "observed");
            printBlockHash(expectedHash, "expected");
            return false;
        }
    }
    stHash_destructIterator(hit);
    return true;
}
Example #2
0
/*
 * Uses the functions above to build an adjacency list, then by DFS attempts to create
 * a valid topological sort, returning non-zero if the graph contains a cycle.
 */
static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) {
    //Build an adjacency list structure..
    stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber);

    //Do a topological sort of the adjacency list
    stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    int64_t cyclic = 0;
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup..
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stIntTuple *seqPos2 = stSortedSet_search(column, seqPos);
        assert(seqPos2 != NULL);
        cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done);
        stIntTuple_destruct(seqPos);
    }

    //cleanup
    stHashIterator *it = stHash_getIterator(adjacencyList);
    stIntTuple *seqPos;
    stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct);
    while((seqPos = stHash_getNext(it)) != NULL) {
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stSortedSet_insert(columns, column);
    }
    stHash_destructIterator(it);
    stHash_destruct(adjacencyList);
    stSortedSet_destruct(columns);
    stSortedSet_destruct(started);
    stSortedSet_destruct(done);

    return cyclic;
}
static void printBlockHash(stHash *hash, const char *title) {
    stHashIterator *hit = stHash_getIterator(hash);
    char *key = NULL;
    row_t *r = NULL;
    printf("%s:\n", title);
    while ((key = stHash_getNext(hit)) != NULL) {
        r = stHash_search(hash, key);
        printf("%20s %6"PRIu64" %6"PRIu64" %c %9"PRIu64" %s\n", r->name ,r->start, r->length, 
               r->strand, r->sourceLength, r->sequence);
    }
    stHash_destructIterator(hit);
}
// Compute the connected components, if they haven't been computed
// already since the last modification.
static void computeConnectedComponents(stNaiveConnectivity *connectivity) {
    if (connectivity->connectedComponentCache != NULL) {
        // Already computed the connected components.
        return;
    }

    stHashIterator *nodeIt = stHash_getIterator(connectivity->nodesToAdjList);
    void *node;
    stNaiveConnectedComponent *componentsHead = NULL;
    while ((node = stHash_getNext(nodeIt)) != NULL) {
        stSet *myNodeSet = stSet_construct();
        stSet_insert(myNodeSet, node);
        struct adjacency *adjList = stHash_search(connectivity->nodesToAdjList, node);
        if (adjList != NULL) {
            while (adjList != NULL) {
                stSet_insert(myNodeSet, adjList->toNode);
                adjList = adjList->next;
            }
        }

        // Now go through the existing connected components and see if
        // this overlaps any of them. If it's not a full overlap, then
        // this set becomes the union, and we continue looking for
        // additional overlaps, then this becomes a new connected
        // component. If we find that this is a subset of an existing
        // component, we can quit early, since we can't possibly add
        // to it or any others.
        stNaiveConnectedComponent *curComponent = componentsHead;
        while (curComponent != NULL) {
            stNaiveConnectedComponent *next = curComponent->next;

            // Find out whether our node set is a subset of this
            // connected component, or if it shares any overlap.
            bool isSubset = true;
            bool overlap = false;
            stSetIterator *myNodeIt = stSet_getIterator(myNodeSet);
            void *node;
            while ((node = stSet_getNext(myNodeIt)) != NULL) {
                if (stSet_search(curComponent->nodes, node)) {
                    overlap = true;
                } else {
                    isSubset = false;
                }
            }
            stSet_destructIterator(myNodeIt);

            if (isSubset) {
                assert(overlap == true);
                // Quit early.
                stSet_destruct(myNodeSet);
                myNodeSet = NULL;
                break;
            } else if (overlap) {
                stSet *newNodeSet = stSet_getUnion(myNodeSet, curComponent->nodes);
                stSet_destruct(myNodeSet);
                removeComponent(&componentsHead, curComponent);
                myNodeSet = newNodeSet;
            }

            curComponent = next;
        }
        if (myNodeSet != NULL) {
            // We have a new (or possibly merged) connected component to
            // add to the list.
            stNaiveConnectedComponent *newComponent = malloc(sizeof(stNaiveConnectedComponent));
            newComponent->nodes = myNodeSet;
            newComponent->next = componentsHead;
            componentsHead = newComponent;
        }
    }

    stHash_destructIterator(nodeIt);

    connectivity->connectedComponentCache = componentsHead;
}
Example #5
0
void stSet_destructIterator(stSetIterator *iterator) {
    stHash_destructIterator(iterator->hashIterator);
    free(iterator);
}
Example #6
0
int main(int argc, char *argv[]) {
    // Parse arguments
    if (argc != 3) {
        usage(argv);
        return 1;
    }

    // You would load a custom HMM here if you wanted using
    // hmm_getStateMachine (see the realign code)
    StateMachine *stateMachine  = stateMachine5_construct(fiveState);

    PairwiseAlignmentParameters *parameters = pairwiseAlignmentBandingParameters_construct();

    stHash *targetSequences = readFastaFile(argv[1]);
    stHash *querySequences = readFastaFile(argv[2]);

    // For each query sequence, align it against all target sequences.
    stHashIterator *queryIt = stHash_getIterator(querySequences);
    char *queryHeader;
    while ((queryHeader = stHash_getNext(queryIt)) != NULL) {
        char *querySeq = stHash_search(querySequences, queryHeader);
        stHashIterator *targetIt = stHash_getIterator(targetSequences);
        char *targetHeader;
        while ((targetHeader = stHash_getNext(targetIt)) != NULL) {
            char *targetSeq = stHash_search(targetSequences, targetHeader);
            // Here we should try both the target sequence and its
            // reverse-complemented version


            // Aligns the sequences.
            // If you have alignment constraints (anchors) you should
            // replace this with getAlignedPairsUsingAnchors.
            stList *alignedPairs = getAlignedPairs(stateMachine, targetSeq,
                                                   querySeq, parameters,
                                                   true, true);
            // Takes into account the probability of aligning to a
            // gap, by transforming the posterior probability into the
            // AMAP objective function (see Schwartz & Pachter, 2007).
            alignedPairs = reweightAlignedPairs2(alignedPairs, strlen(targetSeq),
                                                 strlen(querySeq),
                                                 parameters->gapGamma);
            // I think this calculates the optimal ordered set of
            // alignments from the unordered set of aligned pairs, not
            // completely sure.
            alignedPairs = filterPairwiseAlignmentToMakePairsOrdered(alignedPairs,
                                                                     targetSeq,
                                                                     querySeq,
                                                                     // This parameter says that the minimum posterior probability we will accept has to be at least 0.9.
                                                                     0.9);

            // After this the "aligned pairs" data structure changes,
            // which is a little sketchy. It's just so that the
            // alignment can be printed properly.
            stList_mapReplace(alignedPairs, convertToAnchorPair, NULL);
            stList_sort(alignedPairs, (int (*)(const void *, const void *)) stIntTuple_cmpFn);
            struct PairwiseAlignment *alignment = convertAlignedPairsToPairwiseAlignment(targetHeader, queryHeader,
                                                                                  0, strlen(targetSeq), strlen(querySeq), alignedPairs);
            // Output the cigar string
            cigarWrite(stdout, alignment, 0);

            stList_destruct(alignedPairs);
            destructPairwiseAlignment(alignment);
        }
        stHash_destructIterator(targetIt);
    }
    stHash_destructIterator(queryIt);

    // Clean up
    stHash_destruct(targetSequences);
    stHash_destruct(querySequences);

    pairwiseAlignmentBandingParameters_destruct(parameters);
    stateMachine_destruct(stateMachine);
}