예제 #1
0
/*
 * Uses the functions above to build an adjacency list, then by DFS attempts to create
 * a valid topological sort, returning non-zero if the graph contains a cycle.
 */
static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) {
    //Build an adjacency list structure..
    stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber);

    //Do a topological sort of the adjacency list
    stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    int64_t cyclic = 0;
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup..
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stIntTuple *seqPos2 = stSortedSet_search(column, seqPos);
        assert(seqPos2 != NULL);
        cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done);
        stIntTuple_destruct(seqPos);
    }

    //cleanup
    stHashIterator *it = stHash_getIterator(adjacencyList);
    stIntTuple *seqPos;
    stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct);
    while((seqPos = stHash_getNext(it)) != NULL) {
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stSortedSet_insert(columns, column);
    }
    stHash_destructIterator(it);
    stHash_destruct(adjacencyList);
    stSortedSet_destruct(columns);
    stSortedSet_destruct(started);
    stSortedSet_destruct(done);

    return cyclic;
}
예제 #2
0
static void checkComponents(CuTest *testCase, stList *filteredEdges) {
    stHash *nodesToComponents = getComponents(filteredEdges);
    //Check all components are smaller than threshold
    stList *components = stHash_getValues(nodesToComponents);
    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *component = stList_get(components, i);
        CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize);
        CuAssertTrue(testCase, stSortedSet_size(component) >= 1);
    }
    //Check no edges can be added from those filtered.
    stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn);
    for (int64_t i = 0; i < stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        if (stSortedSet_search(filteredEdgesSet, edge) == NULL) {
            stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
            stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
            stSortedSet *component1 = stHash_search(nodesToComponents, node1);
            stSortedSet *component2 = stHash_search(nodesToComponents, node2);
            CuAssertTrue(testCase, component1 != NULL && component2 != NULL);
            CuAssertTrue(testCase, component1 != component2);
            CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize);
            stIntTuple_destruct(node1);
            stIntTuple_destruct(node2);
        }
    }
    stSortedSet_destruct(filteredEdgesSet);
    //Cleanup the components
    stSortedSet *componentsSet = stList_getSortedSet(components, NULL);
    stList_destruct(components);
    stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct);
    stSortedSet_destruct(componentsSet);
    stHash_destruct(nodesToComponents);
}
void stNaiveConnectivity_addEdge(stNaiveConnectivity *connectivity, void *node1, void *node2) {
    invalidateCache(connectivity);

    struct adjacency *newEdge1 = malloc(sizeof(struct adjacency));
    struct adjacency *newEdge2 = malloc(sizeof(struct adjacency));
    newEdge1->toNode = node2;
    newEdge2->toNode = node1;
    newEdge1->inverse = newEdge2;
    newEdge2->inverse = newEdge1;
    newEdge1->prev = NULL;
    newEdge2->prev = NULL;

    struct adjacency *adjList1 = stHash_search(connectivity->nodesToAdjList, node1);
    if (adjList1 == NULL) {
        newEdge1->next = NULL;
    } else {
        newEdge1->next = adjList1;
        adjList1->prev = newEdge1;
    }
    stHash_remove(connectivity->nodesToAdjList, node1);
    stHash_insert(connectivity->nodesToAdjList, node1, newEdge1);

    struct adjacency *adjList2 = stHash_search(connectivity->nodesToAdjList, node2);
    if (adjList2 == NULL) {
        newEdge2->next = NULL;
    } else {
        newEdge2->next = adjList2;
        adjList2->prev = newEdge2;
    }
    stHash_remove(connectivity->nodesToAdjList, node2);
    stHash_insert(connectivity->nodesToAdjList, node2, newEdge2);
}
예제 #4
0
static stHash *getComponents(stList *filteredEdges) {
    /*
     * A kind of stupid reimplementation of the greedy function, done just to trap typos.
     */
    stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey,
            (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL);
    for (int64_t i = 0; i < stList_length(nodes); i++) {
        stIntTuple *node = stList_get(nodes, i);
        stSortedSet *component = stSortedSet_construct();
        stSortedSet_insert(component, node);
        stHash_insert(nodesToComponents, node, component);
    }
    for (int64_t i = 0; i < stList_length(filteredEdges); i++) {
        stIntTuple *edge = stList_get(filteredEdges, i);
        stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
        stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
        stSortedSet *component1 = stHash_search(nodesToComponents, node1);
        stSortedSet *component2 = stHash_search(nodesToComponents, node2);
        assert(component1 != NULL && component2 != NULL);
        if (component1 != component2) {
            stSortedSet *component3 = stSortedSet_getUnion(component1, component2);
            stSortedSetIterator *setIt = stSortedSet_getIterator(component3);
            stIntTuple *node3;
            while ((node3 = stSortedSet_getNext(setIt)) != NULL) {
                stHash_insert(nodesToComponents, node3, component3);
            }
            stSortedSet_destructIterator(setIt);
            stSortedSet_destruct(component1);
            stSortedSet_destruct(component2);
        }
        stIntTuple_destruct(node1);
        stIntTuple_destruct(node2);
    }
    return nodesToComponents;
}
static void test_readingFasta_0(CuTest *testCase) {
    char inputName[] = ">simChimp.chrA";
    char inputSequence[] = "ATAATACTTGCACACTTCTGCTATTACTTGATGTGTTTTCTATGGGGTGT"
        "CTTTCAGTGCTATGGGCAAGGCCATGGATTAATGGTGCCATAATTGCTCT"
        "AGGCAGTGACTAGAAACAGTTCACAAGTTTTTACTGTATCAAACTATGTT"
        "TTATAGTACGATTCACCCTCCAGGGGACCATCCCAAACTACTGGCCTAAA"
        "AGGACCTGCCATGTTGTAACTCCCCAGCTTAGAAATATAGACGGGAGGAA"
        "TGACaaaaagaagaaaaaaaaaaaaagaaaaaataaaaaaaaaacaaaaa"
        "agatagagaaaaaaaaaagtaaaaacaaaaaaaaataaaaaagggaaaaa"
        "aaataacaaaggaacaaaaaaaaaaaaaaaaaaataaaaagaaaaaCAAG"
        "ATAACCTTCATGCCATTGGAGCTATCTATTATTGTCTTGACCTATGCTTT"
        "ATCAATTTCTTCCTTCCTAGGAAGACATTTTTCTAGAAAGCTAAACGTTT"
        "TTGTAGGCTTGCATGTTCTGTCTGGGCTTGAATGGTTGTGCGTCTACAAG"
        "CCTCATTTACCATAGCACCATGCTTGGGTGGTATCTATCATCATTATCAA"
        "TAGTCAAGTCATTATAATGTTTTGGTGATCAGGCCAGATCCCTTGCACCA"
        "GTGACTTTCTAAATAGCACCTCCTCCATCATTTAAGGATCTCTAGCAACT"
        "TTAATCTGACTCACCTTGCCATGCAGAGTGCATGTTCCTTTTTAACACCC"
        "TGTGATTATGGGTTGGGTCTATTTGTATTTGTTTGATTACATCAGACGAC"
        "CAGGCCAGAGACAGATAAACACAACAGCCACTGGAACCTAAAGCTGTGTT"
        "CAGAATGTCACGGAATGTCTCATTGCACCCAGAGCTAGGGTGGGTATGAG"
        "TATGATCTTCTACATAAGGTACCCCAGGAAAATTAACTTAACAACCAATC"
        "AATTACAGAAGATGAATTCTGCTGTTGTCTCTTATTAGTTGGACTATTCA"
        "GCCTAATGGTTGGCCACTTAGCTTGTCATGAGCATTACTGTACTACTATG"
        "TCTAGTGTTTCCAGTTATTAGTTAGCCCACTGGATAGACAGTTTTGGCTT"
        "GTTTTCTTTCATTTGTATTGCCCACTCACCTAGCAAATCAGACAAAGGGG"
        "CATGTGAAAACTACCTTAGACTCTGCAGTTAGACAAACCATACTTTCCAC"
        "ATAGACCTCAGACATTTGGACATGAATAATTTCCTTCCTCCGGAGTGGTG"
        "GTTCCTCAACACTTATCACTTTCTTCTTCTTTTACCCGTATCACTGTCAA";
    FILE *ofp = de_fopen("testFasta.fa", "w");
    fprintf(ofp, "%s\n", inputName);
    for (size_t i = 0; i < strlen(inputSequence); ++i) {
        fprintf(ofp, "%c", inputSequence[i]);
        if (((i + 1) % 50) == 0) {
            fprintf(ofp, "\n");
        }
    }
    fprintf(ofp, "\n");
    fclose(ofp);
    stHash *sequenceHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyMtfseq);
    addSequencesToHash(sequenceHash, "testFasta.fa");
    mtfseq_t *value = NULL;
    CuAssertTrue(testCase, (value = stHash_search(sequenceHash, "not in there")) == NULL);
    CuAssertTrue(testCase, (value = stHash_search(sequenceHash, "simChimp.chrA")) != NULL);
    if (value != NULL) {
        CuAssertTrue(testCase, strlen(value->seq) == strlen(inputSequence));
        CuAssertTrue(testCase, strcmp(value->seq, inputSequence) == 0);
    }
    if (remove("testFasta.fa")) {
        fprintf(stderr, "Error, unable to remove temporary file testFasta.fa\n");
        exit(EXIT_FAILURE);
    }
    stHash_destruct(sequenceHash);
}
예제 #6
0
/*
 * Function does the actual depth first search to detect if the thing has an acyclic ordering.
 */
static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos,
                   stSortedSet *started, stSortedSet *done) {
    if(stSortedSet_search(started, seqPos) != NULL) {
        if(stSortedSet_search(done, seqPos) == NULL) {
            //We have detected a cycle
            //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1));
            return 1;
        }
        //We have already explored this area, but no cycle.
        return 0;
    }
    stSortedSet_insert(started, seqPos);

    int64_t cycle =0;

    stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1);
    stSortedSet *column = stHash_search(adjacencyList, nextSeqPos);
    if(column != NULL) { //It is in the adjacency list, so we can do the recursion
        assert(stSortedSet_search(column, nextSeqPos) != NULL);
        stSortedSetIterator *it = stSortedSet_getIterator(column);
        stIntTuple *seqPos2;
        while((seqPos2 = stSortedSet_getNext(it)) != NULL) {
            cycle = cycle || dfs(adjacencyList, seqPos2, started, done);
        }
        stSortedSet_destructIterator(it);
    }
    stIntTuple_destruct(nextSeqPos);
    stSortedSet_insert(done, seqPos);
    return cycle;
}
예제 #7
0
/*
 * Recursive function which fills a givenlist with the
 * connected nodes within a module
 */
static void buildFaces_fillTopNodeList(Cap * cap, stList *list,
        stHash *liftedEdgesTable) {
    stList *liftedEdges;
    int64_t index;

    // Limit of recursion
    if (stList_contains(list, cap))
        return;

    // Actual filling
    st_logInfo("Adding cap %p to face\n", cap);
    stList_append(list, cap);

    // Recursion through lifted edges
    if ((liftedEdges = stHash_search(liftedEdgesTable, cap)))
        for (index = 0; index < stList_length(liftedEdges); index++)
            buildFaces_fillTopNodeList(
                    ((LiftedEdge *) stList_get(liftedEdges, index))->destination,
                   list, liftedEdgesTable);

    // Recursion through adjacency
    if (cap_getAdjacency(cap))
        buildFaces_fillTopNodeList(cap_getAdjacency(cap),list,
                liftedEdgesTable);
}
static stList *readMatching(FILE *fileHandle, stList *originalEdges) {
    /*
     * Reads the matching created by Blossum.
     */
    stHash *originalEdgesHash = putEdgesInHash(originalEdges);
    char *line = stFile_getLineFromFile(fileHandle);
    assert(line != NULL);
    int64_t nodeNumber, edgeNumber;
    int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 "\n", &nodeNumber, &edgeNumber);
    assert(i == 2);
    free(line);
    stList *chosenEdges = stList_construct();
    for(int64_t j=0; j<edgeNumber; j++) {
        line = stFile_getLineFromFile(fileHandle);
        int64_t node1, node2;
        i = sscanf(line, "%" PRIi64 " %" PRIi64 "", &node1, &node2);
        assert(i == 2);
        free(line);
        assert(node1 >= 0);
        assert(node1 < nodeNumber);
        assert(node2 >= 0);
        assert(node2 < nodeNumber);
        stIntTuple *edge = constructEdge(node1, node2);
        stIntTuple *originalEdge = stHash_search(originalEdgesHash, edge);
        if(originalEdge != NULL) {
            stList_append(chosenEdges, originalEdge);
        }
        stIntTuple_destruct(edge);
    }
    stHash_destruct(originalEdgesHash);
    return chosenEdges;
}
예제 #9
0
/* Parse XML string into a hash.  This parses all attributes of all tags
 * into values.  st_kv_database_conf type is stored as conf_type,
 * database tag is stores as db_tag.  This does minimal error checking
 * and is really lame.
 */
static stHash *hackParseXmlString(const char *xmlString) {
    stHash *hash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, free);
    char *toReplace[5] = { "</", "<", "/>", ">", "=" };
    char *cA = stString_replace(xmlString, toReplace[0], " "), *cA2;
    for (int64_t i = 1; i < 5; i++) {
        cA2 = stString_replace(cA, toReplace[i], " ");
        free(cA);
        cA = cA2;
    }
    getExpectedToken(&cA2, "st_kv_database_conf");
    stHash_insert(hash, stString_copy("conf_type"), getKeyValue(&cA2, "type"));
    stHash_insert(hash, stString_copy("db_tag"), getNextToken(&cA2));

    char *key;
    while (((key = getNextToken(&cA2)) != NULL) && !stString_eq(key, "st_kv_database_conf")) {
        char *value = getNextToken(&cA2);
        if (value == NULL) {
            stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "failed to to get value for key \"%s\"", key);
        }
        if (stHash_search(hash, key) != NULL) {
            stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "got a duplicate entry in the database conf string \"%s\"", key);
        }
        stHash_insert(hash, key, value);
    }
    if(!stString_eq(key, "st_kv_database_conf")) {
        stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "got an unexpected final entry \"%s\"", key);
    }
    free(key);
    free(cA);
    return hash;
}
예제 #10
0
static const char *getXmlValueRequired(stHash *hash, const char *key) {
    const char *value = stHash_search(hash, (char*)key);
    if (value == NULL) {
        stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "did not find a \"%s\" value in the database XML string", key);
    }
    return value;
}
예제 #11
0
static stKVDatabaseConf *constructFromString(const char *xmlString) {
    stHash *hash = hackParseXmlString(xmlString);
    stKVDatabaseConf *databaseConf = NULL;
    const char *type = getXmlValueRequired(hash, "conf_type");
    const char *dbTag = getXmlValueRequired(hash, "db_tag");
    if (!stString_eq(type, dbTag)) {
        stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "Database XML tag \"%s\" did not match st_kv_database_conf type attribute", dbTag, type);
    }
    if (stString_eq(type, "tokyo_cabinet")) {
        databaseConf = stKVDatabaseConf_constructTokyoCabinet(getXmlValueRequired(hash, "database_dir"));
    } else if (stString_eq(type, "kyoto_tycoon")) {
        databaseConf = stKVDatabaseConf_constructKyotoTycoon(getXmlValueRequired(hash, "host"), 
                                                        getXmlPort(hash), 
                                                        getXmlTimeout(hash), 
                                                        getXMLMaxKTRecordSize(hash),
                                                        getXMLMaxKTBulkSetSize(hash),
                                                        getXMLMaxKTBulkSetNumRecords(hash),
                                                        getXmlValueRequired(hash, "database_dir"),
                                                        stHash_search(hash, "database_name"));
    } else if (stString_eq(type, "mysql")) {
        databaseConf = stKVDatabaseConf_constructMySql(getXmlValueRequired(hash, "host"), getXmlPort(hash),
                                                       getXmlValueRequired(hash, "user"), getXmlValueRequired(hash, "password"),
                                                       getXmlValueRequired(hash, "database_name"), getXmlValueRequired(hash, "table_name"));
    } else {
        stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "invalid database type \"%s\"", type);
    }
    stHash_destruct(hash);
    return databaseConf;
}
static void test_addBlockToHash_3(CuTest *testCase) {
    // concatenation with 2 bases of interstitial and a sequence length breakpoint
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name3.chr1     0 13 +       100 GCAGCTGAAAACA\n",
                                                     observedList
                                                     );
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gtcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr1     50  5 +       100 CCCCC\n"
                                                   , 3);
    stHash *expectedHash = NULL;
    expectedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 18 + 158545518 gcagctgaaaaca------------ACGTA\n"
                                             "s name.chr1      0 17 +       100 ATGT---ATGCCGac----------gtcGG\n"
                                             "s name2.chr1     0 15 +       100 ATGT---ATGCCG------------ATGTg\n"
                                             "s name3          0 28 +        28 GCAGCTGAAAACA--NNNNNNNNNNCCCCC\n",
                                             expectedList
                                             );
    row_t *r = stHash_search(expectedHash, "name3");
    r->prevRightPos = 54;
    free(r->prevName);
    r->prevName = stString_copy("name3.chr1");
    r->multipleNames = true;
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgtc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("GCAGCTGAAAACAggggggggggggggggggggggggggggggggggggg"
                               "CCCCCaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                               );
    stHash_insert(seqHash, stString_copy("name3.chr1"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}
예제 #13
0
stHash *buildSegmentToContigPathHash(stList *maximalHaplotypePaths) {
    stHash *segmentToMaximalHaplotypePathHash = stHash_construct();
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        assert(stList_length(maximalHaplotypePath) > 0);
        for (int64_t j = 0; j < stList_length(maximalHaplotypePath); j++) {
            Segment *segment = stList_get(maximalHaplotypePath, j);
            assert(stHash_search(segmentToMaximalHaplotypePathHash, segment)
                    == NULL);
            assert(stHash_search(segmentToMaximalHaplotypePathHash,
                    segment_getReverse(segment)) == NULL);
            stHash_insert(segmentToMaximalHaplotypePathHash, segment,
                    maximalHaplotypePath);
        }
    }
    return segmentToMaximalHaplotypePathHash;
}
예제 #14
0
/* Default to 50M.  It used to be 175M but since noticing
 * problems in bulk *get* within cactus secondary dbs we 
 * crank it way down.  Unlike bulk set, in the get we don't
 * know the total size of the requested records.  To prevent
 * big data transfers (which are problematic), we reduce max 
 * size of individual records and hope for the best 
 */
static int64_t getXMLMaxKTRecordSize(stHash *hash) {
    const char *value = stHash_search(hash, "max_record_size");
    if (value == NULL) {
        return (int64_t) 10000000;
    } else {
        return stSafeStrToInt64(value);
    }
}
예제 #15
0
static int getXmlPort(stHash *hash) {
    const char *value = stHash_search(hash, "port");
    if (value == NULL) {
        return 0;
    } else {
        return stSafeStrToUInt32(value);
    }
}
예제 #16
0
/* Default to tried-and-true value of 10000
 */
static int64_t getXMLMaxKTBulkSetNumRecords(stHash *hash) {
    const char *value = stHash_search(hash, "max_bulkset_num_records");
    if (value == NULL) {
        return (int64_t) 10000;
    } else {
        return stSafeStrToInt64(value);
    }
}
예제 #17
0
/* Default to 175M which seems to be about where the
 * kyoto tycoon network error danger zone starts
 */
static int64_t getXMLMaxKTBulkSetSize(stHash *hash) {
    const char *value = stHash_search(hash, "max_bulkset_size");
    if (value == NULL) {
        return (int64_t) 183500800;
    } else {
        return stSafeStrToInt64(value);
    }
}
예제 #18
0
static int getXmlTimeout(stHash *hash) {
    const char *value = stHash_search(hash, "timeout");
    if (value == NULL) {
        // default to -1 -- meaning no timeout
        return -1;
    } else {
        return stSafeStrToUInt32(value);
    }
}
예제 #19
0
/*
 * Constructs a face from a given Cap
 */
static void buildFaces_constructFromCap(Cap * startingCap,
        stHash *liftedEdgesTable, Flower * flower) {
    Face *face = face_construct(flower);
    stList *topNodes = stList_construct3(16, NULL);
    stList *liftedEdges;
    Cap *cap, *bottomNode, *ancestor;
    int64_t index, index2;

    printf("Constructing new face");

    // Establishlist of top nodes
    buildFaces_fillTopNodeList(startingCap, topNodes, liftedEdgesTable);

#ifndef NDEBUG
    // What, no top nodes!?
    if (stList_length(topNodes) == 0)
        abort();
#endif

    // Initialize data structure
    face_allocateSpace(face, stList_length(topNodes));

    // For every top node
    for (index = 0; index < stList_length(topNodes); index++) {
        cap = stList_get(topNodes, index);
        face_setTopNode(face, index, cap);
        liftedEdges = stHash_search(liftedEdgesTable, cap);

        if (!liftedEdges) {
            face_setBottomNodeNumber(face, index, 0);
            continue;
        }

        face_setBottomNodeNumber(face, index, stList_length(liftedEdges));
        // For every bottom node of that top node
        for (index2 = 0; index2 < stList_length(liftedEdges); index2++) {
            bottomNode
                    = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode;
            face_addBottomNode(face, index, bottomNode);
            ancestor = cap_getTopCap(cap_getPositiveOrientation(
                    cap_getAdjacency(bottomNode)));
            if (cap_getAdjacency(cap) != ancestor)
                face_setDerivedDestination(face, index, index2, ancestor);
            else
                face_setDerivedDestination(face, index, index2, NULL);

#ifndef NDEBUG
            // If bottom nodes part of top nodes
            assert(!stList_contains(topNodes, cap_getPositiveOrientation(
                    ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode)));
#endif
        }
    }

    // Clean up
    stList_destruct(topNodes);
}
예제 #20
0
static char *segmentWriteFn(Segment *segment) {
    stTree *phylogeneticTree = stHash_search(segmentWriteFn_flowerToPhylogeneticTreeHash, block_getFlower(segment_getBlock(segment)));
    assert(phylogeneticTree != NULL);
    char *segmentString = getMaximumLikelihoodString(phylogeneticTree, segment_getBlock(segment));
    //We append a zero to a segment string if it is part of block containing only a reference segment, else we append a 1.
    //We use these boolean values to determine if a sequence contains only these trivial strings, and is therefore trivial.
    char *appendedSegmentString = stString_print("%s%c ", segmentString, block_getInstanceNumber(segment_getBlock(segment)) == 1 ? '0' : '1');
    free(segmentString);
    return appendedSegmentString;
}
예제 #21
0
bool stNaiveConnectivity_hasEdge(stNaiveConnectivity *connectivity, void *node1, void *node2) {
	struct adjacency *adjList1 = stHash_search(connectivity->nodesToAdjList, node1);
	if(!adjList1) return false;
	while(adjList1 != NULL) {
		if (adjList1->toNode == node2) {
			return true;
		}
		adjList1 = adjList1->next;
	}
	return false;
}
예제 #22
0
/*
 * This builds an adjacency list structure for the the sequences. Every sequence-position
 * has a column in the hash with which it can be aligned with.
 */
static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) {
    stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey,
                                     (int (*)(const void *, const void *))stIntTuple_equalsFn,
                                     (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) {
            stIntTuple *seqPos = stIntTuple_construct2( seq, position);
            stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
            stSortedSet_insert(column, seqPos);
            stHash_insert(hash, seqPos, column);
        }
    }
    stListIterator *it = stList_getIterator(pairs);
    stIntTuple *pair;
    while((pair = stList_getNext(it)) != NULL) {
        stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1));
        stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3));
        stSortedSet *column1 = stHash_search(hash, seqPos1);
        assert(column1 != NULL);
        stSortedSet *column2 = stHash_search(hash, seqPos2);
        assert(column2 != NULL);
        if(column1 != column2) { //Merge the columns
            stSortedSetIterator *it2 = stSortedSet_getIterator(column2);
            stIntTuple *seqPos3;
            while((seqPos3 = stSortedSet_getNext(it2)) != NULL) {
                assert(stSortedSet_search(column1, seqPos3) == NULL);
                stSortedSet_insert(column1, seqPos3);
                assert(stHash_search(hash, seqPos3) == column2);
                stHash_insert(hash, seqPos3, column1);
                assert(stHash_search(hash, seqPos3) == column1);
            }
            stSortedSet_destructIterator(it2);
            stSortedSet_destruct(column2);
        }
        //Cleanup loop.
        stIntTuple_destruct(seqPos1);
        stIntTuple_destruct(seqPos2);
    }
    stList_destructIterator(it);
    return hash;
}
static void printBlockHash(stHash *hash, const char *title) {
    stHashIterator *hit = stHash_getIterator(hash);
    char *key = NULL;
    row_t *r = NULL;
    printf("%s:\n", title);
    while ((key = stHash_getNext(hit)) != NULL) {
        r = stHash_search(hash, key);
        printf("%20s %6"PRIu64" %6"PRIu64" %c %9"PRIu64" %s\n", r->name ,r->start, r->length, 
               r->strand, r->sourceLength, r->sequence);
    }
    stHash_destructIterator(hit);
}
예제 #24
0
/*
 * Fill in a hashtable which to every node associates
 * alist of lifted edges
 */
static stHash *buildFaces_computeLiftedEdges(Flower * flower) {
    stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction,
            buildFaces_key_eq_fn, NULL, buildFaces_destructValue);
    Flower_CapIterator *iter = flower_getCapIterator(flower);
    Cap *cap, *attachedAncestor;
    Cap *adjacency, *adjacencyAncestor;
    stList *liftedEdges;
    LiftedEdge *liftedEdge;

    // Iterate through potential bottom nodes
    while ((cap = flower_getNextCap(iter))) {
        // ... check if connected
        if ((adjacency = cap_getAdjacency(cap))) {
            // ... lift
            attachedAncestor = cap_getTopCap(cap);
            adjacencyAncestor = cap_getTopCap(cap_getPositiveOrientation(
                    adjacency));

#ifndef NDEBUG
            assert((attachedAncestor && adjacencyAncestor) || (!attachedAncestor && !adjacencyAncestor));
#endif

            // If root node
            if (attachedAncestor == NULL)
                continue;

            // ... create lifted edge
            liftedEdge = st_malloc(sizeof(LiftedEdge));
            liftedEdge->destination = adjacencyAncestor;
            liftedEdge->bottomNode = cap;

#ifndef NDEBUG
            // Self loop
            if (adjacencyAncestor == attachedAncestor)
                abort();
#endif

            // ... add it to the hashtable
            if ((liftedEdges
                    = stHash_search(liftedEdgesTable, attachedAncestor))) {
                stList_append(liftedEdges, liftedEdge);
            } else {
                liftedEdges = stList_construct3(2,
                        buildFaces_stList_destructElem);
                stList_append(liftedEdges, liftedEdge);
                stHash_insert(liftedEdgesTable, attachedAncestor, liftedEdges);
            }
        }
    }

    flower_destructCapIterator(iter);
    return liftedEdgesTable;
}
예제 #25
0
static void debugScaffoldPathsP(Cap *cap, stList *haplotypePath,
        stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash,
        stHash *segmentToMaximalHaplotypePathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters, bool capDir) {
    int64_t insertLength;
    int64_t deleteLength;
    Cap *otherCap;
    enum CapCode capCode = getCapCode(cap, &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
    if (capCode == SCAFFOLD_GAP || capCode == AMBIGUITY_GAP) {
        Segment *adjacentSegment = getAdjacentCapsSegment(cap);
        assert(adjacentSegment != NULL);
        while (!hasCapInEvents(cap_getEnd(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)), haplotypeEventStrings)) {
            adjacentSegment = getAdjacentCapsSegment(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment));
            assert(adjacentSegment != NULL);
        }
        assert(adjacentSegment != NULL);
        assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(adjacentSegment))));
        stIntTuple *j = stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath);
        (void)j;
        assert(j != NULL);
        stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
        if (adjacentHaplotypePath == NULL) {
            adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash,
                    segment_getReverse(adjacentSegment));
        }
        assert(adjacentHaplotypePath != NULL);
        assert(adjacentHaplotypePath != haplotypePath);
        stIntTuple *k = stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath);
        (void)k;
        assert(k != NULL);
        assert(stIntTuple_get(j, 0) == stIntTuple_get(k, 0));
        assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) ==
                stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath));
    }
}
static bool hashesAreEqual(stHash *observedHash, stHash *expectedHash) {
    stHashIterator *hit = stHash_getIterator(observedHash);
    char *key;
    while ((key = stHash_getNext(hit)) != NULL) {
        if (stHash_search(expectedHash, key) == NULL) {
            printBlockHash(observedHash, "observed");
            printBlockHash(expectedHash, "expected");
            return false;
        }
        if (!rowsAreEqual(stHash_search(observedHash, key), stHash_search(expectedHash, key))) {
            printBlockHash(observedHash, "observed");
            printBlockHash(expectedHash, "expected");
            return false;
        }
    }
    stHash_destructIterator(hit);
    hit = stHash_getIterator(expectedHash);
    while ((key = stHash_getNext(hit)) != NULL) {
        if (stHash_search(observedHash, key) == NULL) {
            printBlockHash(observedHash, "observed");
            printBlockHash(expectedHash, "expected");
            return false;
        }
        if (!rowsAreEqual(stHash_search(observedHash, key), stHash_search(expectedHash, key))) {
            printBlockHash(observedHash, "observed");
            printBlockHash(expectedHash, "expected");
            return false;
        }
    }
    stHash_destructIterator(hit);
    return true;
}
예제 #27
0
void stNaiveConnectivity_removeNode(stNaiveConnectivity *connectivity, void *node) {
    invalidateCache(connectivity);

    struct adjacency *adjList = stHash_search(connectivity->nodesToAdjList, node);

    while (adjList != NULL) {
        struct adjacency *next = adjList->next; // Have to do this beforehand -- adjList will be freed by next line!
        stNaiveConnectivity_removeEdge(connectivity, node, adjList->toNode);
        adjList = next;
    }

    stHash_remove(connectivity->nodesToAdjList, node);
}
예제 #28
0
void stNaiveConnectivity_removeEdge(stNaiveConnectivity *connectivity, void *node1, void *node2) {
    invalidateCache(connectivity);

    struct adjacency *adjList1 = stHash_search(connectivity->nodesToAdjList, node1);
    assert(adjList1 != NULL);
    assert(stHash_search(connectivity->nodesToAdjList, node2) != NULL);

    while (adjList1 != NULL) {
        if (adjList1->toNode == node2) {
            break;
        }
        adjList1 = adjList1->next;
    }

    // We can find the link in the other node's adjacency list easily
    struct adjacency *adjList2 = adjList1->inverse;
    assert(adjList2->inverse == adjList1);

    // Now remove the links from the lists, and free them.
    removeEdgeFromAdjList(connectivity, node1, adjList1);
    removeEdgeFromAdjList(connectivity, node2, adjList2);
}
예제 #29
0
void flower_reconstructFaces(Flower * flower) {
    flower_destructFaces(flower);
    stHash *liftedEdgesTable = buildFaces_computeLiftedEdges(flower);
    Flower_CapIterator *iter = flower_getCapIterator(flower);
    stList *liftedEdges;
    Cap *current;

    while ((current = flower_getNextCap(iter))) {
        if ((liftedEdges = stHash_search(liftedEdgesTable, current))
                && (stList_length(liftedEdges) >= 1)) {
            buildFaces_constructFromCap(current, liftedEdgesTable, flower);
        }
    }
    stHash_destruct(liftedEdgesTable);
    flower_destructCapIterator(iter);
}
static stList *getEdgesThatBridgeComponents(stList *components,
        stHash *nodesToNonZeroWeightedAdjacencyEdges) {
    /*
     * Get set of adjacency edges that bridge between (have a node in two) components.
     */

    stList *bridgingAdjacencyEdges = stList_construct();

    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *componentNodes = getNodeSetOfEdges(
                stList_get(components, i));
        stSortedSetIterator *it = stSortedSet_getIterator(componentNodes);
        stIntTuple *node;
        while ((node = stSortedSet_getNext(it)) != NULL) {
            stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges,
                    node);
            if (edges != NULL) {
                for (int64_t j = 0; j < stList_length(edges); j++) {
                    stIntTuple *edge = stList_get(edges, j);
                    stIntTuple *node1 = stIntTuple_construct1(
                            stIntTuple_get(edge, 0));
                    stIntTuple *node2 = stIntTuple_construct1(
                            stIntTuple_get(edge, 1));
                    assert(
                            stSortedSet_search(componentNodes, node1) != NULL
                                    || stSortedSet_search(componentNodes, node2)
                                            != NULL);
                    if (stSortedSet_search(componentNodes, node1) == NULL
                            || stSortedSet_search(componentNodes, node2)
                                    == NULL) {
                        stList_append(bridgingAdjacencyEdges, edge);
                    }
                    stIntTuple_destruct(node1);
                    stIntTuple_destruct(node2);
                }
            }
        }
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(componentNodes);
    }

    return bridgingAdjacencyEdges;
}