コード例 #1
0
void stNaiveConnectivity_addEdge(stNaiveConnectivity *connectivity, void *node1, void *node2) {
    invalidateCache(connectivity);

    struct adjacency *newEdge1 = malloc(sizeof(struct adjacency));
    struct adjacency *newEdge2 = malloc(sizeof(struct adjacency));
    newEdge1->toNode = node2;
    newEdge2->toNode = node1;
    newEdge1->inverse = newEdge2;
    newEdge2->inverse = newEdge1;
    newEdge1->prev = NULL;
    newEdge2->prev = NULL;

    struct adjacency *adjList1 = stHash_search(connectivity->nodesToAdjList, node1);
    if (adjList1 == NULL) {
        newEdge1->next = NULL;
    } else {
        newEdge1->next = adjList1;
        adjList1->prev = newEdge1;
    }
    stHash_remove(connectivity->nodesToAdjList, node1);
    stHash_insert(connectivity->nodesToAdjList, node1, newEdge1);

    struct adjacency *adjList2 = stHash_search(connectivity->nodesToAdjList, node2);
    if (adjList2 == NULL) {
        newEdge2->next = NULL;
    } else {
        newEdge2->next = adjList2;
        adjList2->prev = newEdge2;
    }
    stHash_remove(connectivity->nodesToAdjList, node2);
    stHash_insert(connectivity->nodesToAdjList, node2, newEdge2);
}
コード例 #2
0
ファイル: sonLibKVDatabaseConf.c プロジェクト: adderan/sonLib
/* Parse XML string into a hash.  This parses all attributes of all tags
 * into values.  st_kv_database_conf type is stored as conf_type,
 * database tag is stores as db_tag.  This does minimal error checking
 * and is really lame.
 */
static stHash *hackParseXmlString(const char *xmlString) {
    stHash *hash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, free);
    char *toReplace[5] = { "</", "<", "/>", ">", "=" };
    char *cA = stString_replace(xmlString, toReplace[0], " "), *cA2;
    for (int64_t i = 1; i < 5; i++) {
        cA2 = stString_replace(cA, toReplace[i], " ");
        free(cA);
        cA = cA2;
    }
    getExpectedToken(&cA2, "st_kv_database_conf");
    stHash_insert(hash, stString_copy("conf_type"), getKeyValue(&cA2, "type"));
    stHash_insert(hash, stString_copy("db_tag"), getNextToken(&cA2));

    char *key;
    while (((key = getNextToken(&cA2)) != NULL) && !stString_eq(key, "st_kv_database_conf")) {
        char *value = getNextToken(&cA2);
        if (value == NULL) {
            stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "failed to to get value for key \"%s\"", key);
        }
        if (stHash_search(hash, key) != NULL) {
            stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "got a duplicate entry in the database conf string \"%s\"", key);
        }
        stHash_insert(hash, key, value);
    }
    if(!stString_eq(key, "st_kv_database_conf")) {
        stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "got an unexpected final entry \"%s\"", key);
    }
    free(key);
    free(cA);
    return hash;
}
コード例 #3
0
static stHash *getComponents(stList *filteredEdges) {
    /*
     * A kind of stupid reimplementation of the greedy function, done just to trap typos.
     */
    stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey,
            (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL);
    for (int64_t i = 0; i < stList_length(nodes); i++) {
        stIntTuple *node = stList_get(nodes, i);
        stSortedSet *component = stSortedSet_construct();
        stSortedSet_insert(component, node);
        stHash_insert(nodesToComponents, node, component);
    }
    for (int64_t i = 0; i < stList_length(filteredEdges); i++) {
        stIntTuple *edge = stList_get(filteredEdges, i);
        stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
        stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
        stSortedSet *component1 = stHash_search(nodesToComponents, node1);
        stSortedSet *component2 = stHash_search(nodesToComponents, node2);
        assert(component1 != NULL && component2 != NULL);
        if (component1 != component2) {
            stSortedSet *component3 = stSortedSet_getUnion(component1, component2);
            stSortedSetIterator *setIt = stSortedSet_getIterator(component3);
            stIntTuple *node3;
            while ((node3 = stSortedSet_getNext(setIt)) != NULL) {
                stHash_insert(nodesToComponents, node3, component3);
            }
            stSortedSet_destructIterator(setIt);
            stSortedSet_destruct(component1);
            stSortedSet_destruct(component2);
        }
        stIntTuple_destruct(node1);
        stIntTuple_destruct(node2);
    }
    return nodesToComponents;
}
コード例 #4
0
static void test_addBlockToHash_3(CuTest *testCase) {
    // concatenation with 2 bases of interstitial and a sequence length breakpoint
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name3.chr1     0 13 +       100 GCAGCTGAAAACA\n",
                                                     observedList
                                                     );
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gtcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr1     50  5 +       100 CCCCC\n"
                                                   , 3);
    stHash *expectedHash = NULL;
    expectedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 18 + 158545518 gcagctgaaaaca------------ACGTA\n"
                                             "s name.chr1      0 17 +       100 ATGT---ATGCCGac----------gtcGG\n"
                                             "s name2.chr1     0 15 +       100 ATGT---ATGCCG------------ATGTg\n"
                                             "s name3          0 28 +        28 GCAGCTGAAAACA--NNNNNNNNNNCCCCC\n",
                                             expectedList
                                             );
    row_t *r = stHash_search(expectedHash, "name3");
    r->prevRightPos = 54;
    free(r->prevName);
    r->prevName = stString_copy("name3.chr1");
    r->multipleNames = true;
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgtc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("GCAGCTGAAAACAggggggggggggggggggggggggggggggggggggg"
                               "CCCCCaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                               );
    stHash_insert(seqHash, stString_copy("name3.chr1"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}
コード例 #5
0
static void test_addBlockToHash_2(CuTest *testCase) {
    // concatenation with 2 bases of interstitial AND a previously unobserved sequence
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                                     observedList);
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gTcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr@      0  5 +        20 aaccg\n"
                                                   , 3);
    stHash *expectedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 18 + 158545518 gcagctgaaaaca--ACGTA\n"
                                                     "s name.chr1      0 17 +       100 ATGT---ATGCCGacgTcGG\n"
                                                     "s name2.chr1     0 15 +       100 ATGT---ATGCCG--ATGTg\n"
                                                     "s name3.chr@     0  5 +        20 ---------------aaccg\n",
                                                     expectedList
                                                     );
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgTc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("aaccgTTTTTTTTTTTTTTT");
    stHash_insert(seqHash, stString_copy("name3.chr@"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}
コード例 #6
0
static stHash *createSeqHashFromString(char *name, char *input) {
    mtfseq_t *mtfs = newMtfseq(strlen(input));
    stHash *hash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyMtfseq);
    seq_copyIn(mtfs, input);
    stHash_insert(hash, stString_copy(name), mtfs);
    return hash;
}
コード例 #7
0
ファイル: cPecanAlign.c プロジェクト: adderan/cPecan
// Returns a hash mapping from sequence header to sequence data.
static stHash *readFastaFile(char *filename) {
    FILE *fasta = fopen(filename, "r");
    if (fasta == NULL) {
        st_errnoAbort("Could not open fasta file %s", filename);
    }
    stHash *headerToData = stHash_construct3(stHash_stringKey,
                                             stHash_stringEqualKey,
                                             free,
                                             free);
    struct List *seqs = constructEmptyList(0, NULL);
    struct List *seqLengths = constructEmptyList(0, free);
    struct List *headers = constructEmptyList(0, free);
    fastaRead(fasta, seqs, seqLengths, headers);

    for (int64_t i = 0; i < seqs->length; i++) {
        char *fullHeader = headers->list[i];
        stList *headerTokens = stString_splitByString(fullHeader, " ");
        char *usableHeader = stString_copy(stList_get(headerTokens, 0));
        stHash_insert(headerToData, usableHeader, seqs->list[i]);
        stList_destruct(headerTokens);
    }
    destructList(seqs);
    destructList(seqLengths);
    destructList(headers);

    return headerToData;
}
コード例 #8
0
static stHash *putEdgesInHash(stList *edges) {
    stHash *intsToEdgesHash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t i=0; i<stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        stHash_insert(intsToEdgesHash, constructEdge(stIntTuple_get(edge, 0), stIntTuple_get(edge, 1)), edge);
    }
    return intsToEdgesHash;
}
コード例 #9
0
stHash *buildContigPathToContigPathLengthHash(
        stList *maximalHaplotypePaths) {
    stHash *maximalHaplotypesToMaximalHaplotypePathLengths = stHash_construct();
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        int64_t k = contigPathLength(maximalHaplotypePath);
        stHash_insert(maximalHaplotypesToMaximalHaplotypePathLengths,
                maximalHaplotypePath, stIntTuple_construct1( k));
    }
    return maximalHaplotypesToMaximalHaplotypePathLengths;
}
コード例 #10
0
/*
 * This builds an adjacency list structure for the the sequences. Every sequence-position
 * has a column in the hash with which it can be aligned with.
 */
static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) {
    stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey,
                                     (int (*)(const void *, const void *))stIntTuple_equalsFn,
                                     (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) {
            stIntTuple *seqPos = stIntTuple_construct2( seq, position);
            stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
            stSortedSet_insert(column, seqPos);
            stHash_insert(hash, seqPos, column);
        }
    }
    stListIterator *it = stList_getIterator(pairs);
    stIntTuple *pair;
    while((pair = stList_getNext(it)) != NULL) {
        stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1));
        stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3));
        stSortedSet *column1 = stHash_search(hash, seqPos1);
        assert(column1 != NULL);
        stSortedSet *column2 = stHash_search(hash, seqPos2);
        assert(column2 != NULL);
        if(column1 != column2) { //Merge the columns
            stSortedSetIterator *it2 = stSortedSet_getIterator(column2);
            stIntTuple *seqPos3;
            while((seqPos3 = stSortedSet_getNext(it2)) != NULL) {
                assert(stSortedSet_search(column1, seqPos3) == NULL);
                stSortedSet_insert(column1, seqPos3);
                assert(stHash_search(hash, seqPos3) == column2);
                stHash_insert(hash, seqPos3, column1);
                assert(stHash_search(hash, seqPos3) == column1);
            }
            stSortedSet_destructIterator(it2);
            stSortedSet_destruct(column2);
        }
        //Cleanup loop.
        stIntTuple_destruct(seqPos1);
        stIntTuple_destruct(seqPos2);
    }
    stList_destructIterator(it);
    return hash;
}
コード例 #11
0
/*
 * Fill in a hashtable which to every node associates
 * alist of lifted edges
 */
static stHash *buildFaces_computeLiftedEdges(Flower * flower) {
    stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction,
            buildFaces_key_eq_fn, NULL, buildFaces_destructValue);
    Flower_CapIterator *iter = flower_getCapIterator(flower);
    Cap *cap, *attachedAncestor;
    Cap *adjacency, *adjacencyAncestor;
    stList *liftedEdges;
    LiftedEdge *liftedEdge;

    // Iterate through potential bottom nodes
    while ((cap = flower_getNextCap(iter))) {
        // ... check if connected
        if ((adjacency = cap_getAdjacency(cap))) {
            // ... lift
            attachedAncestor = cap_getTopCap(cap);
            adjacencyAncestor = cap_getTopCap(cap_getPositiveOrientation(
                    adjacency));

#ifndef NDEBUG
            assert((attachedAncestor && adjacencyAncestor) || (!attachedAncestor && !adjacencyAncestor));
#endif

            // If root node
            if (attachedAncestor == NULL)
                continue;

            // ... create lifted edge
            liftedEdge = st_malloc(sizeof(LiftedEdge));
            liftedEdge->destination = adjacencyAncestor;
            liftedEdge->bottomNode = cap;

#ifndef NDEBUG
            // Self loop
            if (adjacencyAncestor == attachedAncestor)
                abort();
#endif

            // ... add it to the hashtable
            if ((liftedEdges
                    = stHash_search(liftedEdgesTable, attachedAncestor))) {
                stList_append(liftedEdges, liftedEdge);
            } else {
                liftedEdges = stList_construct3(2,
                        buildFaces_stList_destructElem);
                stList_append(liftedEdges, liftedEdge);
                stHash_insert(liftedEdgesTable, attachedAncestor, liftedEdges);
            }
        }
    }

    flower_destructCapIterator(iter);
    return liftedEdgesTable;
}
コード例 #12
0
void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName,
              bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) {
    /*
     * A reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    stList *caps = getCaps(flowers, referenceEventName);
    for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list.
        setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps);
    }
    for(int64_t i=0; i<stList_length(flowers); i++) {
        recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName);
    }

    //Build the phylogenetic event trees for base calling.
    segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree);
    for(int64_t i=0; i<stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName);
        assert(refEvent != NULL);
        stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix));
    }

    if (isTop) {
        stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn,
                terminalAdjacencyWriteFn);
        assert(stList_length(threadStrings) == stList_length(caps));

        int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences.
        for (int64_t i = 0; i < stList_length(threadStrings); i++) {
            Cap *cap = stList_get(caps, i);
            assert(cap_getStrand(cap));
            assert(!cap_getSide(cap));
            Flower *flower = end_getFlower(cap_getEnd(cap));
            char *threadString = stList_get(threadStrings, i);
            bool trivialString = isTrivialString(&threadString); //This alters the original string
            MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++,
                    threadString, trivialString);
            free(threadString);
            int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1);
            (void) endCoordinate;
            assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence));
        }
        stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop
        stList_destruct(threadStrings);
    } else {
        buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn);
    }
    stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash);
    stList_destruct(caps);
}
コード例 #13
0
// Remove and free an edge properly.
static void removeEdgeFromAdjList(stNaiveConnectivity *connectivity, void *node, struct adjacency *adj) {
    invalidateCache(connectivity);

    if (adj->next != NULL) {
        adj->next->prev = adj->prev;
    }

    if (adj->prev != NULL) {
        adj->prev->next = adj->next;
    } else {
        stHash_remove(connectivity->nodesToAdjList, node);
        stHash_insert(connectivity->nodesToAdjList, node, adj->next);
    }
    free(adj);
}
コード例 #14
0
stHash *buildSegmentToContigPathHash(stList *maximalHaplotypePaths) {
    stHash *segmentToMaximalHaplotypePathHash = stHash_construct();
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        assert(stList_length(maximalHaplotypePath) > 0);
        for (int64_t j = 0; j < stList_length(maximalHaplotypePath); j++) {
            Segment *segment = stList_get(maximalHaplotypePath, j);
            assert(stHash_search(segmentToMaximalHaplotypePathHash, segment)
                    == NULL);
            assert(stHash_search(segmentToMaximalHaplotypePathHash,
                    segment_getReverse(segment)) == NULL);
            stHash_insert(segmentToMaximalHaplotypePathHash, segment,
                    maximalHaplotypePath);
        }
    }
    return segmentToMaximalHaplotypePathHash;
}
コード例 #15
0
/*
 * Recursive function which fills a givenlist with the
 * connected nodes within a module and fills their lifted
 * edges in the same pass
 */
static void buildFaces_fillTopNodeList2(Cap * cap, stList *list,
        stHash *liftedEdgesTable) {
    stList *liftedEdges = stList_construct3(2,
                        buildFaces_stList_destructElem);
    int64_t index;

    // Orientation check
    cap = cap_getPositiveOrientation(cap);

    // Limit of recursion
    if (stList_contains(list, cap))
        return;

    // Actual filling
    st_logInfo("Adding cap %p to face\n", cap);
    stList_append(list, cap);

    // Compute lifted edges
    for (index = 0; index < cap_getChildNumber(cap); index++) 
	buildFaces_computeLiftedEdgesAtTopNode(cap_getChild(cap, index), liftedEdges);

    // If emptylist...
    if (stList_length(liftedEdges) == 0) 
	stList_destruct(liftedEdges);
    // Recursion through lifted edges
    else {
	stHash_insert(liftedEdgesTable, cap, liftedEdges);
        for (index = 0; index < stList_length(liftedEdges); index++)
            buildFaces_fillTopNodeList2(
                    ((LiftedEdge *) stList_get(liftedEdges, index))->destination,
                   list, liftedEdgesTable);
    }

    // Recursion through adjacency
    if (cap_getAdjacency(cap))
        buildFaces_fillTopNodeList2(cap_getAdjacency(cap),list,
                liftedEdgesTable);
}
コード例 #16
0
void stNaiveConnectivity_addNode(stNaiveConnectivity *connectivity, void *node) {
    invalidateCache(connectivity);

    assert(stHash_search(connectivity->nodesToAdjList, node) == NULL);
    stHash_insert(connectivity->nodesToAdjList, node, NULL);
}
コード例 #17
0
ファイル: sonLibSet.c プロジェクト: adderan/sonLib
void stSet_insert(stSet *set, void *key) {
    if (stSet_search(set, key) != NULL) { // This will ensure we don't end up with duplicate keys..
        stSet_remove(set, key);
    }
    stHash_insert(set->hash, key, key); 
}
コード例 #18
0
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths);
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stSortedSet *bucket = stSortedSet_construct();
        stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket);
        stSortedSet_insert(bucket, stList_get(haplotypePaths, i));
    }
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        Segment *_5Segment = stList_get(haplotypePath, 0);
        if (!segment_getStrand(_5Segment)) {
            _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1));
        }
        assert(segment_getStrand(_5Segment));
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings));
        }
        int64_t insertLength;
        int64_t deleteLength;
        Cap *otherCap;
        enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
        if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) {
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL);
            int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0);
            Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment));
            assert(adjacentSegment != NULL);
            while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end
                adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment));
                assert(adjacentSegment != NULL);
            }
            assert(adjacentSegment != NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end
            stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
            if (adjacentHaplotypePath == NULL) {
                adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(
                        adjacentSegment));
            }
            assert(adjacentHaplotypePath != NULL);
            assert(adjacentHaplotypePath != haplotypePath);
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL);
            int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0);

            //Now merge the buckets and make new int tuples..
            stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath);
            stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath);
            assert(bucket1 != NULL);
            assert(bucket2 != NULL);
            assert(bucket1 != bucket2);
            stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2);
            stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3);
            stList *l;
            while ((l = stSortedSet_getNext(bucketIt)) != NULL) {
                //Do the bucket first
                assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2);
                stHash_remove(haplotypePathToScaffoldPathHash, l);
                stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3);
                //Now the length
                stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l);
                assert(m != NULL);
                assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k);
                stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k));
                stIntTuple_destruct(m);
            }
            assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3);
            assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3);
            stSortedSet_destructIterator(bucketIt);
        }
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
    return haplotypeToMaximalHaplotypeLengthHash;
}
コード例 #19
0
ファイル: endAligner.c プロジェクト: benedictpaten/cactus
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength,
        bool useProgressiveMerging, float gapGamma,
        PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) {
    //Make an alignment of the sequences in the ends

    //Get the adjacency sequences to be aligned.
    Cap *cap;
    End_InstanceIterator *it = end_getInstanceIterator(end);
    stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct);
    stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct);
    stHash *endInstanceNumbers = stHash_construct2(NULL, free);
    while((cap = end_getNext(it)) != NULL) {
        if(cap_getSide(cap)) {
            cap = cap_getReverse(cap);
        }
        AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength);
        stList_append(sequences, adjacencySequence);
        assert(cap_getAdjacency(cap) != NULL);
        End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap)));
        stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd)));
        //Increase count of seqfrags with a given end.
        int64_t *c = stHash_search(endInstanceNumbers, otherEnd);
        if(c == NULL) {
            c = st_calloc(1, sizeof(int64_t));
            assert(*c == 0);
            stHash_insert(endInstanceNumbers, otherEnd, c);
        }
        (*c)++;
    }
    end_destructInstanceIterator(it);

    //Get the alignment.
    MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters);

    //Build an array of weights to reweight pairs in the alignment.
    int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing
    //common ends.
    for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) {
        stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i);
        int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1);
        int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2);
        assert(seq1 != seq2);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seq1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seq2);
        int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId
                ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds;
        pairwiseAlignmentsPerSequence[seq1]++;
        pairwiseAlignmentsPerSequence[seq2]++;
    }
    //Now calculate score adjustments.
    double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    for(int64_t i=0; i<stList_length(seqFrags); i++) {
        SeqFrag *seqFrag = stList_get(seqFrags, i);
        End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId);
        assert(otherEnd != NULL);
        assert(stHash_search(endInstanceNumbers, otherEnd) != NULL);
        int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd);
        int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber;

        assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0);

        //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]);
        //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i];
        if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) {
            scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i];
            assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber);
        }
        else {
            scoreAdjustmentsNonCommonEnds[i] = INT64_MIN;
        }
        if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) {
            scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i];
            assert(scoreAdjustmentsCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1);
        }
        else {
            scoreAdjustmentsCommonEnds[i] = INT64_MIN;
        }
    }

	//Convert the alignment pairs to an alignment of the caps..
    stSortedSet *sortedAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    while(stList_length(mA->alignedPairs) > 0) {
        stIntTuple *alignedPair = stList_pop(mA->alignedPairs);
        assert(stIntTuple_length(alignedPair) == 5);
        int64_t seqIndex1 = stIntTuple_get(alignedPair, 1);
        int64_t seqIndex2 = stIntTuple_get(alignedPair, 3);
        AdjacencySequence *i = stList_get(sequences, seqIndex1);
        AdjacencySequence *j = stList_get(sequences, seqIndex2);
        assert(i != j);
        int64_t offset1 = stIntTuple_get(alignedPair, 2);
        int64_t offset2 = stIntTuple_get(alignedPair, 4);
        int64_t score = stIntTuple_get(alignedPair, 0);
        if(score <= 0) { //Happens when indel probs are included
            score = 1; //This is the minimum
        }
        assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2);
        assert(seqFrag1 != seqFrag2);
        double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds;
        assert(scoreAdjustments[seqIndex1] != INT64_MIN);
        assert(scoreAdjustments[seqIndex2] != INT64_MIN);
        AlignedPair *alignedPair2 = alignedPair_construct(
                i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand,
                j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand,
                score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here.
        assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL);
        assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL);
        stSortedSet_insert(sortedAlignment, alignedPair2);
        stSortedSet_insert(sortedAlignment, alignedPair2->reverse);
        stIntTuple_destruct(alignedPair);
    }

    //Cleanup
    stList_destruct(seqFrags);
    stList_destruct(sequences);
    free(pairwiseAlignmentsPerSequenceNonCommonEnds);
    free(pairwiseAlignmentsPerSequenceCommonEnds);
    free(scoreAdjustmentsNonCommonEnds);
    free(scoreAdjustmentsCommonEnds);
    multipleAlignment_destruct(mA);
    stHash_destruct(endInstanceNumbers);

    return sortedAlignment;
}
コード例 #20
0
static void test_addBlockToHash_4(CuTest *testCase) {
    // concatenation with sequnece breakpoint due to *strand* alone
    // note that name3 is well within the interstitial boundary, the two blocks
    // essentially looking like >>>>>>>>>>>>> <<<<< (strand diffs)
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name3.chr1     0 13 +       100 GCAGCTGAAAACA\n",
                                                     observedList
                                                     );
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gtcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr1     82  5 -       100 GGGGG\n"
                                                   , 3);
    stHash *expectedHash = NULL;
    expectedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 18 + 158545518 gcagctgaaaaca------------ACGTA\n"
                                             "s name.chr1      0 17 +       100 ATGT---ATGCCGac----------gtcGG\n"
                                             "s name2.chr1     0 15 +       100 ATGT---ATGCCG------------ATGTg\n"
                                             "s name3.chr1     0 28 +       100 GCAGCTGAAAACA--NNNNNNNNNNGGGGG\n",
                                             expectedList
                                             );
    row_t *r = stHash_search(expectedHash, "name3");
    r->prevRightPos = 86;
    r->strand = '*';
    r->prevStrand = '-';
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgtc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("GCAGCTGAAAACACCCCCgggggggggggggggggggggggggggggggg"
                               "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                               );
    stHash_insert(seqHash, stString_copy("name3.chr1"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}