コード例 #1
0
ファイル: cPecanAlign.c プロジェクト: adderan/cPecan
// Returns a hash mapping from sequence header to sequence data.
static stHash *readFastaFile(char *filename) {
    FILE *fasta = fopen(filename, "r");
    if (fasta == NULL) {
        st_errnoAbort("Could not open fasta file %s", filename);
    }
    stHash *headerToData = stHash_construct3(stHash_stringKey,
                                             stHash_stringEqualKey,
                                             free,
                                             free);
    struct List *seqs = constructEmptyList(0, NULL);
    struct List *seqLengths = constructEmptyList(0, free);
    struct List *headers = constructEmptyList(0, free);
    fastaRead(fasta, seqs, seqLengths, headers);

    for (int64_t i = 0; i < seqs->length; i++) {
        char *fullHeader = headers->list[i];
        stList *headerTokens = stString_splitByString(fullHeader, " ");
        char *usableHeader = stString_copy(stList_get(headerTokens, 0));
        stHash_insert(headerToData, usableHeader, seqs->list[i]);
        stList_destruct(headerTokens);
    }
    destructList(seqs);
    destructList(seqLengths);
    destructList(headers);

    return headerToData;
}
コード例 #2
0
int main(int argc, char **argv) {
  options_t *options = options_construct();
  stHash *sequenceHash = NULL; // keyed on fasta headers, valued with mtfseq_t pointers
  stHash *alignmentHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyRow); // keyed on species names, valued with row_t pointers
  stList *rowOrder = stList_construct3(0, free); // when adding keys to alignmentHash, append to this list
  parseOptions(argc, argv, options);
  // read fastas, populate sequenceHash
  de_verbose("Creating sequence hash.\n");
  sequenceHash = createSequenceHash(options->seqs);
  mafFileApi_t *mfapi = maf_newMfa(options->maf, "r");
  de_verbose("Creating alignment hash.\n");
  buildAlignmentHash(mfapi, alignmentHash, sequenceHash, rowOrder, options);
  if (options->outMfa != NULL) {
    // fasta output
    de_verbose("Writing fasta output.\n");
    writeFastaOut(alignmentHash, rowOrder, options);
  }
  if (options->outMaf != NULL) {
    // maf output
    de_verbose("Writing maf output.\n");
    writeMafOut(alignmentHash, rowOrder, options);
  }
  // cleanup
  maf_destroyMfa(mfapi);
  stHash_destruct(alignmentHash);
  stHash_destruct(sequenceHash);
  stList_destruct(rowOrder);
  destroyOptions(options);
  return(EXIT_SUCCESS);
}
コード例 #3
0
ファイル: sonLibKVDatabaseConf.c プロジェクト: adderan/sonLib
/* Parse XML string into a hash.  This parses all attributes of all tags
 * into values.  st_kv_database_conf type is stored as conf_type,
 * database tag is stores as db_tag.  This does minimal error checking
 * and is really lame.
 */
static stHash *hackParseXmlString(const char *xmlString) {
    stHash *hash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, free);
    char *toReplace[5] = { "</", "<", "/>", ">", "=" };
    char *cA = stString_replace(xmlString, toReplace[0], " "), *cA2;
    for (int64_t i = 1; i < 5; i++) {
        cA2 = stString_replace(cA, toReplace[i], " ");
        free(cA);
        cA = cA2;
    }
    getExpectedToken(&cA2, "st_kv_database_conf");
    stHash_insert(hash, stString_copy("conf_type"), getKeyValue(&cA2, "type"));
    stHash_insert(hash, stString_copy("db_tag"), getNextToken(&cA2));

    char *key;
    while (((key = getNextToken(&cA2)) != NULL) && !stString_eq(key, "st_kv_database_conf")) {
        char *value = getNextToken(&cA2);
        if (value == NULL) {
            stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "failed to to get value for key \"%s\"", key);
        }
        if (stHash_search(hash, key) != NULL) {
            stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "got a duplicate entry in the database conf string \"%s\"", key);
        }
        stHash_insert(hash, key, value);
    }
    if(!stString_eq(key, "st_kv_database_conf")) {
        stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "got an unexpected final entry \"%s\"", key);
    }
    free(key);
    free(cA);
    return hash;
}
コード例 #4
0
static stHash *createSeqHashFromString(char *name, char *input) {
    mtfseq_t *mtfs = newMtfseq(strlen(input));
    stHash *hash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyMtfseq);
    seq_copyIn(mtfs, input);
    stHash_insert(hash, stString_copy(name), mtfs);
    return hash;
}
コード例 #5
0
static stHash *getComponents(stList *filteredEdges) {
    /*
     * A kind of stupid reimplementation of the greedy function, done just to trap typos.
     */
    stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey,
            (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL);
    for (int64_t i = 0; i < stList_length(nodes); i++) {
        stIntTuple *node = stList_get(nodes, i);
        stSortedSet *component = stSortedSet_construct();
        stSortedSet_insert(component, node);
        stHash_insert(nodesToComponents, node, component);
    }
    for (int64_t i = 0; i < stList_length(filteredEdges); i++) {
        stIntTuple *edge = stList_get(filteredEdges, i);
        stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
        stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
        stSortedSet *component1 = stHash_search(nodesToComponents, node1);
        stSortedSet *component2 = stHash_search(nodesToComponents, node2);
        assert(component1 != NULL && component2 != NULL);
        if (component1 != component2) {
            stSortedSet *component3 = stSortedSet_getUnion(component1, component2);
            stSortedSetIterator *setIt = stSortedSet_getIterator(component3);
            stIntTuple *node3;
            while ((node3 = stSortedSet_getNext(setIt)) != NULL) {
                stHash_insert(nodesToComponents, node3, component3);
            }
            stSortedSet_destructIterator(setIt);
            stSortedSet_destruct(component1);
            stSortedSet_destruct(component2);
        }
        stIntTuple_destruct(node1);
        stIntTuple_destruct(node2);
    }
    return nodesToComponents;
}
コード例 #6
0
static stHash *putEdgesInHash(stList *edges) {
    stHash *intsToEdgesHash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey, (int (*)(const void *, const void *))stIntTuple_equalsFn, (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t i=0; i<stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        stHash_insert(intsToEdgesHash, constructEdge(stIntTuple_get(edge, 0), stIntTuple_get(edge, 1)), edge);
    }
    return intsToEdgesHash;
}
コード例 #7
0
static void test_readingFasta_0(CuTest *testCase) {
    char inputName[] = ">simChimp.chrA";
    char inputSequence[] = "ATAATACTTGCACACTTCTGCTATTACTTGATGTGTTTTCTATGGGGTGT"
        "CTTTCAGTGCTATGGGCAAGGCCATGGATTAATGGTGCCATAATTGCTCT"
        "AGGCAGTGACTAGAAACAGTTCACAAGTTTTTACTGTATCAAACTATGTT"
        "TTATAGTACGATTCACCCTCCAGGGGACCATCCCAAACTACTGGCCTAAA"
        "AGGACCTGCCATGTTGTAACTCCCCAGCTTAGAAATATAGACGGGAGGAA"
        "TGACaaaaagaagaaaaaaaaaaaaagaaaaaataaaaaaaaaacaaaaa"
        "agatagagaaaaaaaaaagtaaaaacaaaaaaaaataaaaaagggaaaaa"
        "aaataacaaaggaacaaaaaaaaaaaaaaaaaaataaaaagaaaaaCAAG"
        "ATAACCTTCATGCCATTGGAGCTATCTATTATTGTCTTGACCTATGCTTT"
        "ATCAATTTCTTCCTTCCTAGGAAGACATTTTTCTAGAAAGCTAAACGTTT"
        "TTGTAGGCTTGCATGTTCTGTCTGGGCTTGAATGGTTGTGCGTCTACAAG"
        "CCTCATTTACCATAGCACCATGCTTGGGTGGTATCTATCATCATTATCAA"
        "TAGTCAAGTCATTATAATGTTTTGGTGATCAGGCCAGATCCCTTGCACCA"
        "GTGACTTTCTAAATAGCACCTCCTCCATCATTTAAGGATCTCTAGCAACT"
        "TTAATCTGACTCACCTTGCCATGCAGAGTGCATGTTCCTTTTTAACACCC"
        "TGTGATTATGGGTTGGGTCTATTTGTATTTGTTTGATTACATCAGACGAC"
        "CAGGCCAGAGACAGATAAACACAACAGCCACTGGAACCTAAAGCTGTGTT"
        "CAGAATGTCACGGAATGTCTCATTGCACCCAGAGCTAGGGTGGGTATGAG"
        "TATGATCTTCTACATAAGGTACCCCAGGAAAATTAACTTAACAACCAATC"
        "AATTACAGAAGATGAATTCTGCTGTTGTCTCTTATTAGTTGGACTATTCA"
        "GCCTAATGGTTGGCCACTTAGCTTGTCATGAGCATTACTGTACTACTATG"
        "TCTAGTGTTTCCAGTTATTAGTTAGCCCACTGGATAGACAGTTTTGGCTT"
        "GTTTTCTTTCATTTGTATTGCCCACTCACCTAGCAAATCAGACAAAGGGG"
        "CATGTGAAAACTACCTTAGACTCTGCAGTTAGACAAACCATACTTTCCAC"
        "ATAGACCTCAGACATTTGGACATGAATAATTTCCTTCCTCCGGAGTGGTG"
        "GTTCCTCAACACTTATCACTTTCTTCTTCTTTTACCCGTATCACTGTCAA";
    FILE *ofp = de_fopen("testFasta.fa", "w");
    fprintf(ofp, "%s\n", inputName);
    for (size_t i = 0; i < strlen(inputSequence); ++i) {
        fprintf(ofp, "%c", inputSequence[i]);
        if (((i + 1) % 50) == 0) {
            fprintf(ofp, "\n");
        }
    }
    fprintf(ofp, "\n");
    fclose(ofp);
    stHash *sequenceHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyMtfseq);
    addSequencesToHash(sequenceHash, "testFasta.fa");
    mtfseq_t *value = NULL;
    CuAssertTrue(testCase, (value = stHash_search(sequenceHash, "not in there")) == NULL);
    CuAssertTrue(testCase, (value = stHash_search(sequenceHash, "simChimp.chrA")) != NULL);
    if (value != NULL) {
        CuAssertTrue(testCase, strlen(value->seq) == strlen(inputSequence));
        CuAssertTrue(testCase, strcmp(value->seq, inputSequence) == 0);
    }
    if (remove("testFasta.fa")) {
        fprintf(stderr, "Error, unable to remove temporary file testFasta.fa\n");
        exit(EXIT_FAILURE);
    }
    stHash_destruct(sequenceHash);
}
コード例 #8
0
/*
 * Fill in a hashtable which to every node associates
 * alist of lifted edges
 */
static stHash *buildFaces_computeLiftedEdges(Flower * flower) {
    stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction,
            buildFaces_key_eq_fn, NULL, buildFaces_destructValue);
    Flower_CapIterator *iter = flower_getCapIterator(flower);
    Cap *cap, *attachedAncestor;
    Cap *adjacency, *adjacencyAncestor;
    stList *liftedEdges;
    LiftedEdge *liftedEdge;

    // Iterate through potential bottom nodes
    while ((cap = flower_getNextCap(iter))) {
        // ... check if connected
        if ((adjacency = cap_getAdjacency(cap))) {
            // ... lift
            attachedAncestor = cap_getTopCap(cap);
            adjacencyAncestor = cap_getTopCap(cap_getPositiveOrientation(
                    adjacency));

#ifndef NDEBUG
            assert((attachedAncestor && adjacencyAncestor) || (!attachedAncestor && !adjacencyAncestor));
#endif

            // If root node
            if (attachedAncestor == NULL)
                continue;

            // ... create lifted edge
            liftedEdge = st_malloc(sizeof(LiftedEdge));
            liftedEdge->destination = adjacencyAncestor;
            liftedEdge->bottomNode = cap;

#ifndef NDEBUG
            // Self loop
            if (adjacencyAncestor == attachedAncestor)
                abort();
#endif

            // ... add it to the hashtable
            if ((liftedEdges
                    = stHash_search(liftedEdgesTable, attachedAncestor))) {
                stList_append(liftedEdges, liftedEdge);
            } else {
                liftedEdges = stList_construct3(2,
                        buildFaces_stList_destructElem);
                stList_append(liftedEdges, liftedEdge);
                stHash_insert(liftedEdgesTable, attachedAncestor, liftedEdges);
            }
        }
    }

    flower_destructCapIterator(iter);
    return liftedEdgesTable;
}
コード例 #9
0
/*
 * This builds an adjacency list structure for the the sequences. Every sequence-position
 * has a column in the hash with which it can be aligned with.
 */
static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) {
    stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey,
                                     (int (*)(const void *, const void *))stIntTuple_equalsFn,
                                     (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) {
            stIntTuple *seqPos = stIntTuple_construct2( seq, position);
            stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
            stSortedSet_insert(column, seqPos);
            stHash_insert(hash, seqPos, column);
        }
    }
    stListIterator *it = stList_getIterator(pairs);
    stIntTuple *pair;
    while((pair = stList_getNext(it)) != NULL) {
        stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1));
        stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3));
        stSortedSet *column1 = stHash_search(hash, seqPos1);
        assert(column1 != NULL);
        stSortedSet *column2 = stHash_search(hash, seqPos2);
        assert(column2 != NULL);
        if(column1 != column2) { //Merge the columns
            stSortedSetIterator *it2 = stSortedSet_getIterator(column2);
            stIntTuple *seqPos3;
            while((seqPos3 = stSortedSet_getNext(it2)) != NULL) {
                assert(stSortedSet_search(column1, seqPos3) == NULL);
                stSortedSet_insert(column1, seqPos3);
                assert(stHash_search(hash, seqPos3) == column2);
                stHash_insert(hash, seqPos3, column1);
                assert(stHash_search(hash, seqPos3) == column1);
            }
            stSortedSet_destructIterator(it2);
            stSortedSet_destruct(column2);
        }
        //Cleanup loop.
        stIntTuple_destruct(seqPos1);
        stIntTuple_destruct(seqPos2);
    }
    stList_destructIterator(it);
    return hash;
}
コード例 #10
0
ファイル: sonLibSet.c プロジェクト: adderan/sonLib
stSet *stSet_construct3(uint64_t(*hashKey)(const void *), int(*hashEqualsKey)(const void *, const void *), void(*destructKeys)(void *)) {
    stSet *set = st_malloc(sizeof(*set));
    set->hash = stHash_construct3(hashKey, hashEqualsKey, destructKeys, NULL);
    return set;
}
コード例 #11
0
/*
 * Constructs a face locally from a given Cap but without precomputed liftedEdges
 */
void buildFaces_reconstructFromCap(Cap * startingCap, Flower * flower) {
    Face *face = face_construct(flower);
    stList * liftedEdges;
    stList *topNodes = stList_construct3(16, NULL);
    stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction,
            buildFaces_key_eq_fn, NULL, buildFaces_destructValue);
    Cap *cap, *bottomNode, *ancestor;
    int64_t index, index2;

    printf("Constructing new face");

    // Establishlist of top nodes and fill liftedEdges table
    buildFaces_fillTopNodeList2(startingCap, topNodes, liftedEdgesTable);

#ifndef NDEBUG
    // What, no top nodes!?
    assert(stList_length(topNodes));
#endif

    // Initialize data structure
    face_allocateSpace(face, stList_length(topNodes));

    // For every top node
    for (index = 0; index < stList_length(topNodes); index++) {
        cap = stList_get(topNodes, index);
        face_setTopNode(face, index, cap);
        liftedEdges = stHash_search(liftedEdgesTable, cap);

        if (!liftedEdges) {
            face_setBottomNodeNumber(face, index, 0);
            continue;
        }

        face_setBottomNodeNumber(face, index, stList_length(liftedEdges));
        // For every bottom node of that top node
        for (index2 = 0; index2 < stList_length(liftedEdges); index2++) {
            bottomNode
                    = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode;
            face_addBottomNode(face, index, bottomNode);

            assert(cap_getAdjacency(bottomNode));
            ancestor = cap_getTopCap(cap_getPositiveOrientation(
                    cap_getAdjacency(bottomNode)));
            if (cap_getAdjacency(cap) != ancestor)
                face_setDerivedDestination(face, index, index2, ancestor);
            else
                face_setDerivedDestination(face, index, index2, NULL);

#ifndef NDEBUG
            // If bottom nodes part of top nodes
            if (stList_contains(topNodes, cap_getPositiveOrientation(
                    ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode)))
                abort();
#endif
        }
    }

    // Clean up
    stList_destruct(topNodes);
    stHash_destruct(liftedEdgesTable);
}