C++ (Cpp) stHash_destruct Exemples

Exemple #1

0

Afficher le fichier

Fichier : mafToFastaStitcher.c Projet : adamnovak/mafTools

int main(int argc, char **argv) {
  options_t *options = options_construct();
  stHash *sequenceHash = NULL; // keyed on fasta headers, valued with mtfseq_t pointers
  stHash *alignmentHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyRow); // keyed on species names, valued with row_t pointers
  stList *rowOrder = stList_construct3(0, free); // when adding keys to alignmentHash, append to this list
  parseOptions(argc, argv, options);
  // read fastas, populate sequenceHash
  de_verbose("Creating sequence hash.\n");
  sequenceHash = createSequenceHash(options->seqs);
  mafFileApi_t *mfapi = maf_newMfa(options->maf, "r");
  de_verbose("Creating alignment hash.\n");
  buildAlignmentHash(mfapi, alignmentHash, sequenceHash, rowOrder, options);
  if (options->outMfa != NULL) {
    // fasta output
    de_verbose("Writing fasta output.\n");
    writeFastaOut(alignmentHash, rowOrder, options);
  }
  if (options->outMaf != NULL) {
    // maf output
    de_verbose("Writing maf output.\n");
    writeMafOut(alignmentHash, rowOrder, options);
  }
  // cleanup
  maf_destroyMfa(mfapi);
  stHash_destruct(alignmentHash);
  stHash_destruct(sequenceHash);
  stList_destruct(rowOrder);
  destroyOptions(options);
  return(EXIT_SUCCESS);
}

Exemple #2

0

Afficher le fichier

Fichier : test.mafToFastaStitcherAPI.c Projet : adamnovak/mafTools

static void test_addBlockToHash_3(CuTest *testCase) {
    // concatenation with 2 bases of interstitial and a sequence length breakpoint
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name3.chr1     0 13 +       100 GCAGCTGAAAACA\n",
                                                     observedList
                                                     );
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gtcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr1     50  5 +       100 CCCCC\n"
                                                   , 3);
    stHash *expectedHash = NULL;
    expectedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 18 + 158545518 gcagctgaaaaca------------ACGTA\n"
                                             "s name.chr1      0 17 +       100 ATGT---ATGCCGac----------gtcGG\n"
                                             "s name2.chr1     0 15 +       100 ATGT---ATGCCG------------ATGTg\n"
                                             "s name3          0 28 +        28 GCAGCTGAAAACA--NNNNNNNNNNCCCCC\n",
                                             expectedList
                                             );
    row_t *r = stHash_search(expectedHash, "name3");
    r->prevRightPos = 54;
    free(r->prevName);
    r->prevName = stString_copy("name3.chr1");
    r->multipleNames = true;
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgtc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("GCAGCTGAAAACAggggggggggggggggggggggggggggggggggggg"
                               "CCCCCaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                               );
    stHash_insert(seqHash, stString_copy("name3.chr1"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}

Exemple #3

0

Afficher le fichier

Fichier : cycleConstraintMatchingAlgorithms.c Projet : benedictpaten/matchingAndOrdering

static AdjacencySwitch *getBestAdjacencySwitch(stList *cycles,
        stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges) {
    /*
     * Returns the best 3 or 4 edge switch (one including 3 or 4 edges) for the given existing edge, if they exist, or else NULL.
     */
    assert(stList_length(cycles) > 0);

    stHash *nodesToNonZeroWeightedAdjacencyEdges = getNodesToEdgesHash(
            nonZeroWeightAdjacencyEdges);

    stList *allComponentEdges = stList_join(cycles);
    assert(stList_length(allComponentEdges) > 0);
    stHash *nodesToAllCurrentEdgesSet = getNodesToEdgesHash(allComponentEdges);

    /*
     * Get list of adjacency edges that bridge between (have a node in two) components.
     */
    stList *bridgingAdjacencyEdges = getEdgesThatBridgeComponents(cycles,
            nodesToNonZeroWeightedAdjacencyEdges);
    stHash *nodesToBridgingAdjacencyEdges = getNodesToEdgesHash(
            bridgingAdjacencyEdges);

    /*
     * For the best 2 edge switch.
     */
    AdjacencySwitch *minimumCostAdjacencySwitch = getBest2EdgeAdjacencySwitch(
            cycles, allAdjacencyEdges);

    /*
     * Look for the best 3 or 4 edge switch.
     */
    for (int64_t i = 0; i < stList_length(allComponentEdges); i++) {
        minimumCostAdjacencySwitch = getMinimumCostAdjacencySwitch(
                minimumCostAdjacencySwitch,
                getBest4EdgeAdjacencySwitch2(stList_get(allComponentEdges, i),
                        allAdjacencyEdges, nodesToAllCurrentEdgesSet,
                        nodesToBridgingAdjacencyEdges));
    }
    assert(minimumCostAdjacencySwitch != NULL);

    /*
     * Cleanup
     */
    stList_destruct(allComponentEdges);
    stList_destruct(bridgingAdjacencyEdges);
    stHash_destruct(nodesToAllCurrentEdgesSet);
    stHash_destruct(nodesToBridgingAdjacencyEdges);
    stHash_destruct(nodesToNonZeroWeightedAdjacencyEdges);

    return minimumCostAdjacencySwitch;
}

Exemple #4

0

Afficher le fichier

Fichier : test.mafToFastaStitcherAPI.c Projet : adamnovak/mafTools

static void test_addBlockToHash_2(CuTest *testCase) {
    // concatenation with 2 bases of interstitial AND a previously unobserved sequence
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                                     observedList);
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gTcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr@      0  5 +        20 aaccg\n"
                                                   , 3);
    stHash *expectedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 18 + 158545518 gcagctgaaaaca--ACGTA\n"
                                                     "s name.chr1      0 17 +       100 ATGT---ATGCCGacgTcGG\n"
                                                     "s name2.chr1     0 15 +       100 ATGT---ATGCCG--ATGTg\n"
                                                     "s name3.chr@     0  5 +        20 ---------------aaccg\n",
                                                     expectedList
                                                     );
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgTc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("aaccgTTTTTTTTTTTTTTT");
    stHash_insert(seqHash, stString_copy("name3.chr@"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}

Exemple #5

0

Afficher le fichier

Fichier : scaffoldPaths.c Projet : benedictpaten/assemblaLib

static void debugScaffoldPaths(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stHash *haplotypeToMaximalHaplotypeLengthHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        //Traversing from 5' end..
        Segment *_5Segment = stList_get(haplotypePath, 0);
        Segment *_3Segment = stList_get(haplotypePath, stList_length(haplotypePath) - 1);
        assert(segment_getStrand(_5Segment) == segment_getStrand(_3Segment));
        if (!segment_getStrand(_5Segment)) {
            Segment *j = _5Segment;
            _5Segment = segment_getReverse(_3Segment);
            _3Segment = segment_getReverse(j);
        }
        assert(segment_getStrand(_5Segment));
        assert(segment_getStrand(_3Segment));
        Cap *_5Cap = segment_get5Cap(_5Segment);
        Cap *_3Cap = segment_get3Cap(_3Segment);
        if (getAdjacentCapsSegment(_5Cap) != NULL) {
            assert(!trueAdjacency(_5Cap, haplotypeEventStrings));
        }
        if (getAdjacentCapsSegment(_3Cap) != NULL) {
            assert(!trueAdjacency(_3Cap, haplotypeEventStrings));
        }
        debugScaffoldPathsP(_5Cap, haplotypePath,
                haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash,
                segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 1);
        debugScaffoldPathsP(_3Cap, haplotypePath,
                haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash,
                segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 0);
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
}

Exemple #6

0

Afficher le fichier

Fichier : perfectMatching.c Projet : benedictpaten/matchingAndOrdering

static void makeMatchingPerfect(stList *chosenEdges, stList *adjacencyEdges,
        stSortedSet *nodes) {
    /*
     * While the the number of edges is less than a perfect matching add random edges.
     */
    stSortedSet *attachedNodes = getNodeSetOfEdges(chosenEdges);
    stHash *nodesToAdjacencyEdges = getNodesToEdgesHash(adjacencyEdges);
    stIntTuple *pNode = NULL;
    stSortedSetIterator *it = stSortedSet_getIterator(nodes);
    stIntTuple *node;
    while((node = stSortedSet_getNext(it)) != NULL) {
        if (stSortedSet_search(attachedNodes, node) == NULL) {
            if (pNode == NULL) {
                pNode = node;
            } else {
                stList_append(chosenEdges,
                        getEdgeForNodes(stIntTuple_get(pNode, 0), stIntTuple_get(node, 0), nodesToAdjacencyEdges));
                pNode = NULL;
            }
        }
    }
    stSortedSet_destructIterator(it);
    assert(pNode == NULL);
    stSortedSet_destruct(attachedNodes);
    assert(stList_length(chosenEdges) * 2 == stSortedSet_size(nodes));
    stHash_destruct(nodesToAdjacencyEdges);
}

Exemple #7

0

Afficher le fichier

Fichier : giantComponentTest.c Projet : benedictpaten/cactus

static void checkComponents(CuTest *testCase, stList *filteredEdges) {
    stHash *nodesToComponents = getComponents(filteredEdges);
    //Check all components are smaller than threshold
    stList *components = stHash_getValues(nodesToComponents);
    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *component = stList_get(components, i);
        CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize);
        CuAssertTrue(testCase, stSortedSet_size(component) >= 1);
    }
    //Check no edges can be added from those filtered.
    stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn);
    for (int64_t i = 0; i < stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        if (stSortedSet_search(filteredEdgesSet, edge) == NULL) {
            stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
            stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
            stSortedSet *component1 = stHash_search(nodesToComponents, node1);
            stSortedSet *component2 = stHash_search(nodesToComponents, node2);
            CuAssertTrue(testCase, component1 != NULL && component2 != NULL);
            CuAssertTrue(testCase, component1 != component2);
            CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize);
            stIntTuple_destruct(node1);
            stIntTuple_destruct(node2);
        }
    }
    stSortedSet_destruct(filteredEdgesSet);
    //Cleanup the components
    stSortedSet *componentsSet = stList_getSortedSet(components, NULL);
    stList_destruct(components);
    stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct);
    stSortedSet_destruct(componentsSet);
    stHash_destruct(nodesToComponents);
}

Exemple #8

0

Afficher le fichier

Fichier : matchingAlgorithms.c Projet : benedictpaten/matchingAndOrdering

static stList *readMatching(FILE *fileHandle, stList *originalEdges) {
    /*
     * Reads the matching created by Blossum.
     */
    stHash *originalEdgesHash = putEdgesInHash(originalEdges);
    char *line = stFile_getLineFromFile(fileHandle);
    assert(line != NULL);
    int64_t nodeNumber, edgeNumber;
    int64_t i = sscanf(line, "%" PRIi64 " %" PRIi64 "\n", &nodeNumber, &edgeNumber);
    assert(i == 2);
    free(line);
    stList *chosenEdges = stList_construct();
    for(int64_t j=0; j<edgeNumber; j++) {
        line = stFile_getLineFromFile(fileHandle);
        int64_t node1, node2;
        i = sscanf(line, "%" PRIi64 " %" PRIi64 "", &node1, &node2);
        assert(i == 2);
        free(line);
        assert(node1 >= 0);
        assert(node1 < nodeNumber);
        assert(node2 >= 0);
        assert(node2 < nodeNumber);
        stIntTuple *edge = constructEdge(node1, node2);
        stIntTuple *originalEdge = stHash_search(originalEdgesHash, edge);
        if(originalEdge != NULL) {
            stList_append(chosenEdges, originalEdge);
        }
        stIntTuple_destruct(edge);
    }
    stHash_destruct(originalEdgesHash);
    return chosenEdges;
}

Exemple #9

0

Afficher le fichier

Fichier : sonLibKVDatabaseConf.c Projet : adderan/sonLib

static stKVDatabaseConf *constructFromString(const char *xmlString) {
    stHash *hash = hackParseXmlString(xmlString);
    stKVDatabaseConf *databaseConf = NULL;
    const char *type = getXmlValueRequired(hash, "conf_type");
    const char *dbTag = getXmlValueRequired(hash, "db_tag");
    if (!stString_eq(type, dbTag)) {
        stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "Database XML tag \"%s\" did not match st_kv_database_conf type attribute", dbTag, type);
    }
    if (stString_eq(type, "tokyo_cabinet")) {
        databaseConf = stKVDatabaseConf_constructTokyoCabinet(getXmlValueRequired(hash, "database_dir"));
    } else if (stString_eq(type, "kyoto_tycoon")) {
        databaseConf = stKVDatabaseConf_constructKyotoTycoon(getXmlValueRequired(hash, "host"), 
                                                        getXmlPort(hash), 
                                                        getXmlTimeout(hash), 
                                                        getXMLMaxKTRecordSize(hash),
                                                        getXMLMaxKTBulkSetSize(hash),
                                                        getXMLMaxKTBulkSetNumRecords(hash),
                                                        getXmlValueRequired(hash, "database_dir"),
                                                        stHash_search(hash, "database_name"));
    } else if (stString_eq(type, "mysql")) {
        databaseConf = stKVDatabaseConf_constructMySql(getXmlValueRequired(hash, "host"), getXmlPort(hash),
                                                       getXmlValueRequired(hash, "user"), getXmlValueRequired(hash, "password"),
                                                       getXmlValueRequired(hash, "database_name"), getXmlValueRequired(hash, "table_name"));
    } else {
        stThrowNew(ST_KV_DATABASE_EXCEPTION_ID, "invalid database type \"%s\"", type);
    }
    stHash_destruct(hash);
    return databaseConf;
}

Exemple #10

0

Afficher le fichier

Fichier : scaffoldPaths.c Projet : benedictpaten/assemblaLib

stHash *getScaffoldPaths(stList *haplotypePaths, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypePathToScaffoldPathHash = stHash_construct();
    stHash *i = getScaffoldPathsP(haplotypePaths, haplotypePathToScaffoldPathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters);
    debugScaffoldPaths(haplotypePaths, haplotypePathToScaffoldPathHash, i, haplotypeEventStrings, contaminationEventStrings, capCodeParameters);
    stHash_destruct(i);
    return haplotypePathToScaffoldPathHash;
}

Exemple #11

0

Afficher le fichier

Fichier : sonLibPosetAlignmentTest.c Projet : ptdtan/sonLib

/*
 * Uses the functions above to build an adjacency list, then by DFS attempts to create
 * a valid topological sort, returning non-zero if the graph contains a cycle.
 */
static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) {
    //Build an adjacency list structure..
    stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber);

    //Do a topological sort of the adjacency list
    stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    int64_t cyclic = 0;
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup..
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stIntTuple *seqPos2 = stSortedSet_search(column, seqPos);
        assert(seqPos2 != NULL);
        cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done);
        stIntTuple_destruct(seqPos);
    }

    //cleanup
    stHashIterator *it = stHash_getIterator(adjacencyList);
    stIntTuple *seqPos;
    stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct);
    while((seqPos = stHash_getNext(it)) != NULL) {
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stSortedSet_insert(columns, column);
    }
    stHash_destructIterator(it);
    stHash_destruct(adjacencyList);
    stSortedSet_destruct(columns);
    stSortedSet_destruct(started);
    stSortedSet_destruct(done);

    return cyclic;
}

Exemple #12

0

Afficher le fichier

Fichier : test.mafToFastaStitcherAPI.c Projet : adamnovak/mafTools

static void test_readingFasta_0(CuTest *testCase) {
    char inputName[] = ">simChimp.chrA";
    char inputSequence[] = "ATAATACTTGCACACTTCTGCTATTACTTGATGTGTTTTCTATGGGGTGT"
        "CTTTCAGTGCTATGGGCAAGGCCATGGATTAATGGTGCCATAATTGCTCT"
        "AGGCAGTGACTAGAAACAGTTCACAAGTTTTTACTGTATCAAACTATGTT"
        "TTATAGTACGATTCACCCTCCAGGGGACCATCCCAAACTACTGGCCTAAA"
        "AGGACCTGCCATGTTGTAACTCCCCAGCTTAGAAATATAGACGGGAGGAA"
        "TGACaaaaagaagaaaaaaaaaaaaagaaaaaataaaaaaaaaacaaaaa"
        "agatagagaaaaaaaaaagtaaaaacaaaaaaaaataaaaaagggaaaaa"
        "aaataacaaaggaacaaaaaaaaaaaaaaaaaaataaaaagaaaaaCAAG"
        "ATAACCTTCATGCCATTGGAGCTATCTATTATTGTCTTGACCTATGCTTT"
        "ATCAATTTCTTCCTTCCTAGGAAGACATTTTTCTAGAAAGCTAAACGTTT"
        "TTGTAGGCTTGCATGTTCTGTCTGGGCTTGAATGGTTGTGCGTCTACAAG"
        "CCTCATTTACCATAGCACCATGCTTGGGTGGTATCTATCATCATTATCAA"
        "TAGTCAAGTCATTATAATGTTTTGGTGATCAGGCCAGATCCCTTGCACCA"
        "GTGACTTTCTAAATAGCACCTCCTCCATCATTTAAGGATCTCTAGCAACT"
        "TTAATCTGACTCACCTTGCCATGCAGAGTGCATGTTCCTTTTTAACACCC"
        "TGTGATTATGGGTTGGGTCTATTTGTATTTGTTTGATTACATCAGACGAC"
        "CAGGCCAGAGACAGATAAACACAACAGCCACTGGAACCTAAAGCTGTGTT"
        "CAGAATGTCACGGAATGTCTCATTGCACCCAGAGCTAGGGTGGGTATGAG"
        "TATGATCTTCTACATAAGGTACCCCAGGAAAATTAACTTAACAACCAATC"
        "AATTACAGAAGATGAATTCTGCTGTTGTCTCTTATTAGTTGGACTATTCA"
        "GCCTAATGGTTGGCCACTTAGCTTGTCATGAGCATTACTGTACTACTATG"
        "TCTAGTGTTTCCAGTTATTAGTTAGCCCACTGGATAGACAGTTTTGGCTT"
        "GTTTTCTTTCATTTGTATTGCCCACTCACCTAGCAAATCAGACAAAGGGG"
        "CATGTGAAAACTACCTTAGACTCTGCAGTTAGACAAACCATACTTTCCAC"
        "ATAGACCTCAGACATTTGGACATGAATAATTTCCTTCCTCCGGAGTGGTG"
        "GTTCCTCAACACTTATCACTTTCTTCTTCTTTTACCCGTATCACTGTCAA";
    FILE *ofp = de_fopen("testFasta.fa", "w");
    fprintf(ofp, "%s\n", inputName);
    for (size_t i = 0; i < strlen(inputSequence); ++i) {
        fprintf(ofp, "%c", inputSequence[i]);
        if (((i + 1) % 50) == 0) {
            fprintf(ofp, "\n");
        }
    }
    fprintf(ofp, "\n");
    fclose(ofp);
    stHash *sequenceHash = stHash_construct3(stHash_stringKey, stHash_stringEqualKey, free, destroyMtfseq);
    addSequencesToHash(sequenceHash, "testFasta.fa");
    mtfseq_t *value = NULL;
    CuAssertTrue(testCase, (value = stHash_search(sequenceHash, "not in there")) == NULL);
    CuAssertTrue(testCase, (value = stHash_search(sequenceHash, "simChimp.chrA")) != NULL);
    if (value != NULL) {
        CuAssertTrue(testCase, strlen(value->seq) == strlen(inputSequence));
        CuAssertTrue(testCase, strcmp(value->seq, inputSequence) == 0);
    }
    if (remove("testFasta.fa")) {
        fprintf(stderr, "Error, unable to remove temporary file testFasta.fa\n");
        exit(EXIT_FAILURE);
    }
    stHash_destruct(sequenceHash);
}

Exemple #13

0

Afficher le fichier

Fichier : addReferenceCoordinates.c Projet : benedictpaten/cactus

void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName,
              bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) {
    /*
     * A reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    stList *caps = getCaps(flowers, referenceEventName);
    for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list.
        setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps);
    }
    for(int64_t i=0; i<stList_length(flowers); i++) {
        recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName);
    }

    //Build the phylogenetic event trees for base calling.
    segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree);
    for(int64_t i=0; i<stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName);
        assert(refEvent != NULL);
        stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix));
    }

    if (isTop) {
        stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn,
                terminalAdjacencyWriteFn);
        assert(stList_length(threadStrings) == stList_length(caps));

        int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences.
        for (int64_t i = 0; i < stList_length(threadStrings); i++) {
            Cap *cap = stList_get(caps, i);
            assert(cap_getStrand(cap));
            assert(!cap_getSide(cap));
            Flower *flower = end_getFlower(cap_getEnd(cap));
            char *threadString = stList_get(threadStrings, i);
            bool trivialString = isTrivialString(&threadString); //This alters the original string
            MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++,
                    threadString, trivialString);
            free(threadString);
            int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1);
            (void) endCoordinate;
            assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence));
        }
        stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop
        stList_destruct(threadStrings);
    } else {
        buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn);
    }
    stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash);
    stList_destruct(caps);
}

Exemple #14

0

Afficher le fichier

Fichier : test.mafToFastaStitcherAPI.c Projet : adamnovak/mafTools

static void test_penalize_0(CuTest *testCase) {
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                                     observedList);
    stHash *expectedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n"
                                                     "s name.chr1      0 15 +       100 ATGT---ATGCCGNNNNN\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG-----\n",
                                                     expectedList);
    penalize(observedHash, "name.chr1", 5);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
}

Exemple #15

0

Afficher le fichier

Fichier : cactusFacesBuilding.c Projet : benedictpaten/cactus

void flower_reconstructFaces(Flower * flower) {
    flower_destructFaces(flower);
    stHash *liftedEdgesTable = buildFaces_computeLiftedEdges(flower);
    Flower_CapIterator *iter = flower_getCapIterator(flower);
    stList *liftedEdges;
    Cap *current;

    while ((current = flower_getNextCap(iter))) {
        if ((liftedEdges = stHash_search(liftedEdgesTable, current))
                && (stList_length(liftedEdges) >= 1)) {
            buildFaces_constructFromCap(current, liftedEdgesTable, flower);
        }
    }
    stHash_destruct(liftedEdgesTable);
    flower_destructCapIterator(iter);
}

Exemple #16

0

Afficher le fichier

Fichier : cycleConstraintMatchingAlgorithms.c Projet : benedictpaten/matchingAndOrdering

static stSortedSet *getOddNodes(stList *cycle) {
    /*
     * Returns alternating nodes in a simple cycle.
     */

    //Set to return
    stSortedSet *nodes = stSortedSet_construct3(
            (int(*)(const void *, const void *)) stIntTuple_cmpFn,
            (void(*)(void *)) stIntTuple_destruct);

    stHash *nodesToEdges = getNodesToEdgesHash(cycle);
    int64_t node = stIntTuple_get(stList_get(cycle, 0), 0);
    int64_t pNode = -1;
    int64_t counter = 0;
    bool b = 1;
    assert(stList_length(cycle) % 2 == 0);
    while (counter++ < stList_length(cycle)) {
        if (b) { //Make alternating
            addNodeToSet(nodes, node);
            b = 0;
        } else {
            b = 1;
        }
        stList *edges = getItemForNode(node, nodesToEdges);
        assert(stList_length(edges) == 2);
        stIntTuple *edge = stList_get(edges, 0);
        int64_t node2 = getOtherPosition(edge, node);
        if (node2 != pNode) {
            pNode = node;
            node = node2;
            continue;
        }
        edge = stList_get(edges, 1);
        node2 = getOtherPosition(edge, node);
        assert(node2 != pNode);
        pNode = node;
        node = node2;
    }
    stHash_destruct(nodesToEdges);

    assert(stList_length(cycle) / 2 == stSortedSet_size(nodes));

    return nodes;
}

Exemple #17

0

Afficher le fichier

Fichier : test.mafToFastaStitcherAPI.c Projet : adamnovak/mafTools

static void test_interstitial_0(CuTest *testCase) {
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                                     observedList);
    stHash *expectedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n"
                                                     "s name.chr1      0 15 +       100 ATGT---ATGCCGaaaTa\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG-----\n",
                                                     expectedList);
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGaaaTaTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"
                                              "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT");
    interstitialInsert(observedHash, seqHash, "name.chr1", 10, '+', 5);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    observedList = stList_construct3(0, free);
    expectedList = stList_construct3(0, free);
    observedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                             "s name.chr1      0 10 -       100 ATGT---ATGCCG\n"
                                             "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n",
                                             observedList);
    expectedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 13 + 158545518 gcagctgaaaaca-----\n"
                                             "s name.chr1      0 15 -       100 ATGT---ATGCCGaaaTa\n"
                                             "s name2.chr1     0 10 +       100 ATGT---ATGCCG-----\n",
                                             expectedList);
    seqHash = createSeqHashFromString("name.chr1", "ggggggggggggTTgggggggggggggggggggggggggggggggggggg" // 50
                                      "gggaagggGgggCgggTgggAgggcgggtgggagg" // 35
                                      "tAtttCGGCATACAT");
    interstitialInsert(observedHash, seqHash, "name.chr1", 10, '-', 5);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
}

Exemple #18

0

Afficher le fichier

Fichier : cycleConstraintMatchingAlgorithms.c Projet : benedictpaten/matchingAndOrdering

stList *getComponents(stList *edges) {
    /*
     * Gets a list of connected components, each connected component
     * being represented as a list of the edges, such that each edge is in exactly one
     * connected component. Allows for multi-graphs (multiple edges connecting two nodes).
     */

    stHash *nodesToEdges = getNodesToEdgesHash(edges);

    /*
     * Traverse the edges greedily
     */
    stList *components =
            stList_construct3(0, (void(*)(void *)) stList_destruct);
    stList *nodes = stHash_getKeys(nodesToEdges);
    while (stList_length(nodes) > 0) {
        stIntTuple *node = stList_pop(nodes);
        stList *edges = stHash_search(nodesToEdges, node);
        if (edges != NULL) { //We have a component to build
            stSortedSet *component = stSortedSet_construct();
            stHash_remove(nodesToEdges, node);
            for (int64_t i = 0; i < stList_length(edges); i++) {
                stIntTuple *edge = stList_get(edges, i);
                getComponentsP(nodesToEdges, stIntTuple_get(edge, 0),
                        component);
                getComponentsP(nodesToEdges, stIntTuple_get(edge, 1),
                        component);
            }
            stList_append(components, stSortedSet_getList(component));
            //Cleanup
            stSortedSet_destruct(component);
            stList_destruct(edges);
        }
        stIntTuple_destruct(node);
    }
    assert(stHash_size(nodesToEdges) == 0);
    stHash_destruct(nodesToEdges);
    stList_destruct(nodes);

    return components;
}

Exemple #19

0

Afficher le fichier

Fichier : scaffoldPaths.c Projet : benedictpaten/assemblaLib

static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths);
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stSortedSet *bucket = stSortedSet_construct();
        stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket);
        stSortedSet_insert(bucket, stList_get(haplotypePaths, i));
    }
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        Segment *_5Segment = stList_get(haplotypePath, 0);
        if (!segment_getStrand(_5Segment)) {
            _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1));
        }
        assert(segment_getStrand(_5Segment));
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings));
        }
        int64_t insertLength;
        int64_t deleteLength;
        Cap *otherCap;
        enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
        if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) {
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL);
            int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0);
            Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment));
            assert(adjacentSegment != NULL);
            while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end
                adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment));
                assert(adjacentSegment != NULL);
            }
            assert(adjacentSegment != NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end
            stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
            if (adjacentHaplotypePath == NULL) {
                adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(
                        adjacentSegment));
            }
            assert(adjacentHaplotypePath != NULL);
            assert(adjacentHaplotypePath != haplotypePath);
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL);
            int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0);

            //Now merge the buckets and make new int tuples..
            stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath);
            stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath);
            assert(bucket1 != NULL);
            assert(bucket2 != NULL);
            assert(bucket1 != bucket2);
            stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2);
            stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3);
            stList *l;
            while ((l = stSortedSet_getNext(bucketIt)) != NULL) {
                //Do the bucket first
                assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2);
                stHash_remove(haplotypePathToScaffoldPathHash, l);
                stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3);
                //Now the length
                stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l);
                assert(m != NULL);
                assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k);
                stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k));
                stIntTuple_destruct(m);
            }
            assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3);
            assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3);
            stSortedSet_destructIterator(bucketIt);
        }
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
    return haplotypeToMaximalHaplotypeLengthHash;
}

Exemple #20

0

Afficher le fichier

Fichier : test.mafToFastaStitcherAPI.c Projet : adamnovak/mafTools

static void test_newBlockHashFromBlock_0(CuTest *testCase) {
    stList *orderList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0 test=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name3.chr1     5 13 -       100 ATGTgggATGCCG\n",
                                                     orderList);
    CuAssertTrue(testCase, stHash_size(observedHash) == 4);
    row_t *key = NULL;
    key = stHash_search(observedHash, "reference");
    CuAssertTrue(testCase, key != NULL);
    CuAssertTrue(testCase, strcmp(key->name, "reference.chr0") == 0);
    CuAssertTrue(testCase, strcmp(key->prevName, "reference.chr0") == 0);
    CuAssertTrue(testCase, key->multipleNames == false);
    CuAssertTrue(testCase, key->start == 0);
    CuAssertTrue(testCase, key->length == 13);
    CuAssertTrue(testCase, key->sourceLength == 158545518);
    CuAssertTrue(testCase, key->prevRightPos == 12);
    CuAssertTrue(testCase, key->strand == '+');
    CuAssertTrue(testCase, key->prevStrand == '+');
    CuAssertTrue(testCase, strcmp(key->sequence, "gcagctgaaaaca") == 0);
    // row 2
    key = stHash_search(observedHash, "name");
    CuAssertTrue(testCase, key != NULL);
    CuAssertTrue(testCase, strcmp(key->name, "name.chr1") == 0);
    CuAssertTrue(testCase, strcmp(key->prevName, "name.chr1") == 0);
    CuAssertTrue(testCase, key->multipleNames == false);
    CuAssertTrue(testCase, key->start == 0);
    CuAssertTrue(testCase, key->length == 10);
    CuAssertTrue(testCase, key->sourceLength == 100);
    CuAssertTrue(testCase, key->prevRightPos == 9);
    CuAssertTrue(testCase, key->strand == '+');
    CuAssertTrue(testCase, key->prevStrand == '+');
    CuAssertTrue(testCase, strcmp(key->sequence, "ATGT---ATGCCG") == 0);
    // row 3
    key = stHash_search(observedHash, "name2");
    CuAssertTrue(testCase, key != NULL);
    CuAssertTrue(testCase, strcmp(key->name, "name2.chr1") == 0);
    CuAssertTrue(testCase, strcmp(key->prevName, "name2.chr1") == 0);
    CuAssertTrue(testCase, key->multipleNames == false);
    CuAssertTrue(testCase, key->start == 0);
    CuAssertTrue(testCase, key->length == 10);
    CuAssertTrue(testCase, key->sourceLength == 100);
    CuAssertTrue(testCase, key->prevRightPos == 9);
    CuAssertTrue(testCase, key->strand == '+');
    CuAssertTrue(testCase, key->prevStrand == '+');
    CuAssertTrue(testCase, strcmp(key->sequence, "ATGT---ATGCCG") == 0);
    // row 4
    key = stHash_search(observedHash, "name3");
    CuAssertTrue(testCase, key != NULL);
    CuAssertTrue(testCase, strcmp(key->name, "name3.chr1") == 0);
    CuAssertTrue(testCase, strcmp(key->prevName, "name3.chr1") == 0);
    CuAssertTrue(testCase, key->multipleNames == false);
    CuAssertTrue(testCase, key->start == 5);
    CuAssertTrue(testCase, key->length == 13);
    CuAssertTrue(testCase, key->sourceLength == 100);
    CuAssertTrue(testCase, key->prevRightPos == 17);
    CuAssertTrue(testCase, key->strand == '-');
    CuAssertTrue(testCase, key->prevStrand == '-');
    CuAssertTrue(testCase, strcmp(key->sequence, "ATGTgggATGCCG") == 0);
    stList_destruct(orderList);
    stHash_destruct(observedHash);
}

Exemple #21

0

Afficher le fichier

Fichier : cPecanAlign.c Projet : adderan/cPecan

int main(int argc, char *argv[]) {
    // Parse arguments
    if (argc != 3) {
        usage(argv);
        return 1;
    }

    // You would load a custom HMM here if you wanted using
    // hmm_getStateMachine (see the realign code)
    StateMachine *stateMachine  = stateMachine5_construct(fiveState);

    PairwiseAlignmentParameters *parameters = pairwiseAlignmentBandingParameters_construct();

    stHash *targetSequences = readFastaFile(argv[1]);
    stHash *querySequences = readFastaFile(argv[2]);

    // For each query sequence, align it against all target sequences.
    stHashIterator *queryIt = stHash_getIterator(querySequences);
    char *queryHeader;
    while ((queryHeader = stHash_getNext(queryIt)) != NULL) {
        char *querySeq = stHash_search(querySequences, queryHeader);
        stHashIterator *targetIt = stHash_getIterator(targetSequences);
        char *targetHeader;
        while ((targetHeader = stHash_getNext(targetIt)) != NULL) {
            char *targetSeq = stHash_search(targetSequences, targetHeader);
            // Here we should try both the target sequence and its
            // reverse-complemented version


            // Aligns the sequences.
            // If you have alignment constraints (anchors) you should
            // replace this with getAlignedPairsUsingAnchors.
            stList *alignedPairs = getAlignedPairs(stateMachine, targetSeq,
                                                   querySeq, parameters,
                                                   true, true);
            // Takes into account the probability of aligning to a
            // gap, by transforming the posterior probability into the
            // AMAP objective function (see Schwartz & Pachter, 2007).
            alignedPairs = reweightAlignedPairs2(alignedPairs, strlen(targetSeq),
                                                 strlen(querySeq),
                                                 parameters->gapGamma);
            // I think this calculates the optimal ordered set of
            // alignments from the unordered set of aligned pairs, not
            // completely sure.
            alignedPairs = filterPairwiseAlignmentToMakePairsOrdered(alignedPairs,
                                                                     targetSeq,
                                                                     querySeq,
                                                                     // This parameter says that the minimum posterior probability we will accept has to be at least 0.9.
                                                                     0.9);

            // After this the "aligned pairs" data structure changes,
            // which is a little sketchy. It's just so that the
            // alignment can be printed properly.
            stList_mapReplace(alignedPairs, convertToAnchorPair, NULL);
            stList_sort(alignedPairs, (int (*)(const void *, const void *)) stIntTuple_cmpFn);
            struct PairwiseAlignment *alignment = convertAlignedPairsToPairwiseAlignment(targetHeader, queryHeader,
                                                                                  0, strlen(targetSeq), strlen(querySeq), alignedPairs);
            // Output the cigar string
            cigarWrite(stdout, alignment, 0);

            stList_destruct(alignedPairs);
            destructPairwiseAlignment(alignment);
        }
        stHash_destructIterator(targetIt);
    }
    stHash_destructIterator(queryIt);

    // Clean up
    stHash_destruct(targetSequences);
    stHash_destruct(querySequences);

    pairwiseAlignmentBandingParameters_destruct(parameters);
    stateMachine_destruct(stateMachine);
}

Exemple #22

0

Afficher le fichier

int main(int argc, char *argv[]) {
    /*
     * Script for adding alignments to cactus tree.
     */
    int64_t startTime;
    stKVDatabaseConf *kvDatabaseConf;
    CactusDisk *cactusDisk;
    int key, k;

    bool (*filterFn)(stPinchSegment *, stPinchSegment *) = NULL;
    stSet *outgroupThreads = NULL;

    /*
     * Arguments/options
     */
    char * logLevelString = NULL;
    char * alignmentsFile = NULL;
    char * constraintsFile = NULL;
    char * cactusDiskDatabaseString = NULL;
    char * lastzArguments = "";
    int64_t minimumSequenceLengthForBlast = 1;

    //Parameters for annealing/melting rounds
    int64_t *annealingRounds = NULL;
    int64_t annealingRoundsLength = 0;
    int64_t *meltingRounds = NULL;
    int64_t meltingRoundsLength = 0;

    //Parameters for melting
    float maximumAdjacencyComponentSizeRatio = 10;
    int64_t blockTrim = 0;
    int64_t alignmentTrimLength = 0;
    int64_t *alignmentTrims = NULL;
    int64_t chainLengthForBigFlower = 1000000;
    int64_t longChain = 2;
    int64_t minLengthForChromosome = 1000000;
    float proportionOfUnalignedBasesForNewChromosome = 0.8;
    bool breakChainsAtReverseTandems = 1;
    int64_t maximumMedianSequenceLengthBetweenLinkedEnds = INT64_MAX;
    bool realign = 0;
    char *realignArguments = "";
    bool removeRecoverableChains = false;
    bool (*recoverableChainsFilter)(stCactusEdgeEnd *, Flower *) = NULL;
    int64_t maxRecoverableChainsIterations = 1;
    int64_t maxRecoverableChainLength = INT64_MAX;

    //Parameters for removing ancient homologies
    bool doPhylogeny = false;
    int64_t phylogenyNumTrees = 1;
    enum stCaf_RootingMethod phylogenyRootingMethod = BEST_RECON;
    enum stCaf_ScoringMethod phylogenyScoringMethod = COMBINED_LIKELIHOOD;
    double breakpointScalingFactor = 1.0;
    bool phylogenySkipSingleCopyBlocks = 0;
    int64_t phylogenyMaxBaseDistance = 1000;
    int64_t phylogenyMaxBlockDistance = 100;
    bool phylogenyKeepSingleDegreeBlocks = 0;
    stList *phylogenyTreeBuildingMethods = stList_construct();
    enum stCaf_TreeBuildingMethod defaultMethod = GUIDED_NEIGHBOR_JOINING;
    stList_append(phylogenyTreeBuildingMethods, &defaultMethod);
    double phylogenyCostPerDupPerBase = 0.2;
    double phylogenyCostPerLossPerBase = 0.2;
    const char *debugFileName = NULL;
    const char *referenceEventHeader = NULL;
    double phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce = 1.0;
    int64_t numTreeBuildingThreads = 2;
    int64_t minimumBlockDegreeToCheckSupport = 10;
    double minimumBlockHomologySupport = 0.7;
    double nucleotideScalingFactor = 1.0;
    HomologyUnitType phylogenyHomologyUnitType = BLOCK;
    enum stCaf_DistanceCorrectionMethod phylogenyDistanceCorrectionMethod = JUKES_CANTOR;
    bool sortAlignments = false;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "alignments", required_argument, 0, 'b' }, {
                "cactusDisk", required_argument, 0, 'c' }, { "lastzArguments", required_argument, 0, 'd' },
                { "help", no_argument, 0, 'h' }, { "annealingRounds", required_argument, 0, 'i' }, { "trim", required_argument, 0, 'k' }, {
                        "trimChange", required_argument, 0, 'l', }, { "minimumTreeCoverage", required_argument, 0, 'm' }, { "blockTrim",
                        required_argument, 0, 'n' }, { "deannealingRounds", required_argument, 0, 'o' }, { "minimumDegree",
                        required_argument, 0, 'p' }, { "minimumIngroupDegree", required_argument, 0, 'q' }, {
                        "minimumOutgroupDegree", required_argument, 0, 'r' }, { "alignmentFilter", required_argument, 0, 't' }, {
                        "minimumSequenceLengthForBlast", required_argument, 0, 'v' }, { "maxAdjacencyComponentSizeRatio",
                        required_argument, 0, 'w' }, { "constraints", required_argument, 0, 'x' }, { "minLengthForChromosome",
                        required_argument, 0, 'y' }, { "proportionOfUnalignedBasesForNewChromosome", required_argument, 0, 'z' },
                        { "maximumMedianSequenceLengthBetweenLinkedEnds", required_argument, 0, 'A' },
                        { "realign", no_argument, 0, 'B' }, { "realignArguments", required_argument, 0, 'C' },
                        { "phylogenyNumTrees", required_argument, 0, 'D' },
                        { "phylogenyRootingMethod", required_argument, 0, 'E' },
                        { "phylogenyScoringMethod", required_argument, 0, 'F' },
                        { "phylogenyBreakpointScalingFactor", required_argument, 0, 'G' },
                        { "phylogenySkipSingleCopyBlocks", no_argument, 0, 'H' },
                        { "phylogenyMaxBaseDistance", required_argument, 0, 'I' },
                        { "phylogenyMaxBlockDistance", required_argument, 0, 'J' },
                        { "phylogenyDebugFile", required_argument, 0, 'K' },
                        { "phylogenyKeepSingleDegreeBlocks", no_argument, 0, 'L' },
                        { "phylogenyTreeBuildingMethod", required_argument, 0, 'M' },
                        { "phylogenyCostPerDupPerBase", required_argument, 0, 'N' },
                        { "phylogenyCostPerLossPerBase", required_argument, 0, 'O' },
                        { "referenceEventHeader", required_argument, 0, 'P' },
                        { "phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce", required_argument, 0, 'Q' },
                        { "numTreeBuildingThreads", required_argument, 0, 'R' },
                        { "phylogeny", no_argument, 0, 'S' },
                        { "minimumBlockHomologySupport", required_argument, 0, 'T' },
                        { "phylogenyNucleotideScalingFactor", required_argument, 0, 'U' },
                        { "minimumBlockDegreeToCheckSupport", required_argument, 0, 'V' },
                        { "removeRecoverableChains", required_argument, 0, 'W' },
                        { "minimumNumberOfSpecies", required_argument, 0, 'X' },
                        { "phylogenyHomologyUnitType", required_argument, 0, 'Y' },
                        { "phylogenyDistanceCorrectionMethod", required_argument, 0, 'Z' },
                        { "maxRecoverableChainsIterations", required_argument, 0, '1' },
                        { "maxRecoverableChainLength", required_argument, 0, '2' },
                        { 0, 0, 0, 0 } };

        int option_index = 0;

        key = getopt_long(argc, argv, "a:b:c:hi:k:m:n:o:p:q:r:stv:w:x:y:z:A:BC:D:E:", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                st_setLogLevelFromString(logLevelString);
                break;
            case 'b':
                alignmentsFile = stString_copy(optarg);
                break;
            case 'c':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'd':
                lastzArguments = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            case 'i':
                annealingRounds = getInts(optarg, &annealingRoundsLength);
                break;
            case 'o':
                meltingRounds = getInts(optarg, &meltingRoundsLength);
                break;
            case 'k':
                alignmentTrims = getInts(optarg, &alignmentTrimLength);
                break;
            case 'm':
                k = sscanf(optarg, "%f", &minimumTreeCoverage);
                assert(k == 1);
                break;
            case 'n':
                k = sscanf(optarg, "%" PRIi64 "", &blockTrim);
                assert(k == 1);
                break;
            case 'p':
                k = sscanf(optarg, "%" PRIi64 "", &minimumDegree);
                assert(k == 1);
                break;
            case 'q':
                k = sscanf(optarg, "%" PRIi64 "", &minimumIngroupDegree);
                assert(k == 1);
                break;
            case 'r':
                k = sscanf(optarg, "%" PRIi64 "", &minimumOutgroupDegree);
                assert(k == 1);
                break;
            case 't':
                if (strcmp(optarg, "singleCopyOutgroup") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_filterByOutgroup;
                } else if (strcmp(optarg, "relaxedSingleCopyOutgroup") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_relaxedFilterByOutgroup;
                } else if (strcmp(optarg, "singleCopy") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_filterByRepeatSpecies;
                } else if (strcmp(optarg, "relaxedSingleCopy") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_relaxedFilterByRepeatSpecies;
                } else if (strcmp(optarg, "singleCopyChr") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_singleCopyChr;
                } else if (strcmp(optarg, "singleCopyIngroup") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_singleCopyIngroup;
                } else if (strcmp(optarg, "relaxedSingleCopyIngroup") == 0) {
                    sortAlignments = true;
                    filterFn = stCaf_relaxedSingleCopyIngroup;
                } else if (strcmp(optarg, "none") == 0) {
                    sortAlignments = false;
                    filterFn = NULL;
                } else {
                    st_errAbort("Could not recognize alignmentFilter option %s", optarg);
                }
                break;
            case 'v':
                k = sscanf(optarg, "%" PRIi64 "", &minimumSequenceLengthForBlast);
                assert(k == 1);
                break;
            case 'w':
                k = sscanf(optarg, "%f", &maximumAdjacencyComponentSizeRatio);
                assert(k == 1);
                break;
            case 'x':
                constraintsFile = stString_copy(optarg);
                break;
            case 'y':
                k = sscanf(optarg, "%" PRIi64 "", &minLengthForChromosome);
                assert(k == 1);
                break;
            case 'z':
                k = sscanf(optarg, "%f", &proportionOfUnalignedBasesForNewChromosome);
                assert(k == 1);
                break;
            case 'A':
                k = sscanf(optarg, "%" PRIi64 "", &maximumMedianSequenceLengthBetweenLinkedEnds);
                assert(k == 1);
                break;
            case 'B':
                realign = 1;
                break;
            case 'C':
                realignArguments = stString_copy(optarg);
                break;
            case 'D':
                k = sscanf(optarg, "%" PRIi64, &phylogenyNumTrees);
                assert(k == 1);
                break;
            case 'E':
                if (!strcmp(optarg, "outgroupBranch")) {
                    phylogenyRootingMethod = OUTGROUP_BRANCH;
                } else if (!strcmp(optarg, "longestBranch")) {
                    phylogenyRootingMethod = LONGEST_BRANCH;
                } else if (!strcmp(optarg, "bestRecon")) {
                    phylogenyRootingMethod = BEST_RECON;
                } else {
                    st_errAbort("Invalid tree rooting method: %s", optarg);
                }
                break;
            case 'F':
                if (!strcmp(optarg, "reconCost")) {
                    phylogenyScoringMethod = RECON_COST;
                } else if (!strcmp(optarg, "nucLikelihood")) {
                    phylogenyScoringMethod = NUCLEOTIDE_LIKELIHOOD;
                } else if (!strcmp(optarg, "reconLikelihood")) {
                    phylogenyScoringMethod = RECON_LIKELIHOOD;
                } else if (!strcmp(optarg, "combinedLikelihood")) {
                    phylogenyScoringMethod = COMBINED_LIKELIHOOD;
                } else {
                    st_errAbort("Invalid tree scoring method: %s", optarg);
                }
                break;
            case 'G':
                k = sscanf(optarg, "%lf", &breakpointScalingFactor);
                assert(k == 1);
                break;
            case 'H':
                phylogenySkipSingleCopyBlocks = true;
                break;
            case 'I':
                k = sscanf(optarg, "%" PRIi64, &phylogenyMaxBaseDistance);
                assert(k == 1);
                break;
            case 'J':
                k = sscanf(optarg, "%" PRIi64, &phylogenyMaxBlockDistance);
                assert(k == 1);
                break;
            case 'K':
                debugFileName = stString_copy(optarg);
                break;
            case 'L':
                phylogenyKeepSingleDegreeBlocks = true;
                break;
            case 'M':
                // clear the default setting of the list
                stList_destruct(phylogenyTreeBuildingMethods);
                phylogenyTreeBuildingMethods = stList_construct();
                stList *methodStrings = stString_splitByString(optarg, ",");

                for (int64_t i = 0; i < stList_length(methodStrings); i++) {
                    char *methodString = stList_get(methodStrings, i);
                    enum stCaf_TreeBuildingMethod *method = st_malloc(sizeof(enum stCaf_TreeBuildingMethod));
                    if (strcmp(methodString, "neighborJoining") == 0) {
                        *method = NEIGHBOR_JOINING;
                    } else if (strcmp(methodString, "guidedNeighborJoining") == 0) {
                        *method = GUIDED_NEIGHBOR_JOINING;
                    } else if (strcmp(methodString, "splitDecomposition") == 0) {
                        *method = SPLIT_DECOMPOSITION;
                    } else if (strcmp(methodString, "strictSplitDecomposition") == 0) {
                        *method = STRICT_SPLIT_DECOMPOSITION;
                    } else if (strcmp(methodString, "removeBadChains") == 0) {
                        *method = REMOVE_BAD_CHAINS;
                    } else {
                        st_errAbort("Unknown tree building method: %s", methodString);
                    }
                    stList_append(phylogenyTreeBuildingMethods, method);
                }
                stList_destruct(methodStrings);
                break;
            case 'N':
                k = sscanf(optarg, "%lf", &phylogenyCostPerDupPerBase);
                assert(k == 1);
                break;
            case 'O':
                k = sscanf(optarg, "%lf", &phylogenyCostPerLossPerBase);
                assert(k == 1);
                break;
            case 'P':
                referenceEventHeader = stString_copy(optarg);
                break;
            case 'Q':
                k = sscanf(optarg, "%lf", &phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce);
                assert(k == 1);
                break;
            case 'R':
                k = sscanf(optarg, "%" PRIi64, &numTreeBuildingThreads);
                assert(k == 1);
                break;
            case 'S':
                doPhylogeny = true;
                break;
            case 'T':
                k = sscanf(optarg, "%lf", &minimumBlockHomologySupport);
                assert(k == 1);
                assert(minimumBlockHomologySupport <= 1.0);
                assert(minimumBlockHomologySupport >= 0.0);
                break;
            case 'U':
                k = sscanf(optarg, "%lf", &nucleotideScalingFactor);
                assert(k == 1);
                break;
            case 'V':
                k = sscanf(optarg, "%" PRIi64, &minimumBlockDegreeToCheckSupport);
                assert(k == 1);
                break;
            case 'W':
                if (strcmp(optarg, "1") == 0) {
                    removeRecoverableChains = true;
                    recoverableChainsFilter = NULL;
                } else if (strcmp(optarg, "unequalNumberOfIngroupCopies") == 0) {
                    removeRecoverableChains = true;
                    recoverableChainsFilter = stCaf_chainHasUnequalNumberOfIngroupCopies;
                } else if (strcmp(optarg, "unequalNumberOfIngroupCopiesOrNoOutgroup") == 0) {
                    removeRecoverableChains = true;
                    recoverableChainsFilter = stCaf_chainHasUnequalNumberOfIngroupCopiesOrNoOutgroup;
                } else if (strcmp(optarg, "0") == 0) {
                    removeRecoverableChains = false;
                } else {
                    st_errAbort("Could not parse removeRecoverableChains argument");
                }
                break;
            case 'X':
                k = sscanf(optarg, "%" PRIi64, &minimumNumberOfSpecies);
                if (k != 1) {
                    st_errAbort("Error parsing the minimumNumberOfSpecies argument");
                }
                break;
            case 'Y':
                if (strcmp(optarg, "chain") == 0) {
                    phylogenyHomologyUnitType = CHAIN;
                } else if (strcmp(optarg, "block") == 0) {
                    phylogenyHomologyUnitType = BLOCK;
                } else {
                    st_errAbort("Could not parse the phylogenyHomologyUnitType argument");
                }
                break;
            case 'Z':
                if (strcmp(optarg, "jukesCantor") == 0) {
                    phylogenyDistanceCorrectionMethod = JUKES_CANTOR;
                } else if (strcmp(optarg, "none") == 0 ) {
                    phylogenyDistanceCorrectionMethod = NONE;
                } else {
                    st_errAbort("Could not parse the phylogenyDistanceCorrectionMethod argument");
                }
                break;
            case '1':
                k = sscanf(optarg, "%" PRIi64, &maxRecoverableChainsIterations);
                if (k != 1) {
                    st_errAbort("Error parsing the maxRecoverableChainsIterations argument");
                }
                break;
            case '2':
                k = sscanf(optarg, "%" PRIi64, &maxRecoverableChainLength);
                if (k != 1) {
                    st_errAbort("Error parsing the maxRecoverableChainLength argument");
                }
                break;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    assert(cactusDiskDatabaseString != NULL);
    assert(minimumTreeCoverage >= 0.0);
    assert(minimumTreeCoverage <= 1.0);
    assert(blockTrim >= 0);
    assert(annealingRoundsLength >= 0);
    for (int64_t i = 0; i < annealingRoundsLength; i++) {
        assert(annealingRounds[i] >= 0);
    }
    assert(meltingRoundsLength >= 0);
    for (int64_t i = 1; i < meltingRoundsLength; i++) {
        assert(meltingRounds[i - 1] < meltingRounds[i]);
        assert(meltingRounds[i - 1] >= 1);
    }
    assert(alignmentTrimLength >= 0);
    for (int64_t i = 0; i < alignmentTrimLength; i++) {
        assert(alignmentTrims[i] >= 0);
    }
    assert(minimumOutgroupDegree >= 0);
    assert(minimumIngroupDegree >= 0);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString);

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    st_logInfo("Set up the flower disk\n");

    ///////////////////////////////////////////////////////////////////////////
    // Sort the constraints
    ///////////////////////////////////////////////////////////////////////////

    stPinchIterator *pinchIteratorForConstraints = NULL;
    if (constraintsFile != NULL) {
        pinchIteratorForConstraints = stPinchIterator_constructFromFile(constraintsFile);
        st_logInfo("Created an iterator for the alignment constaints from file: %s\n", constraintsFile);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Do the alignment
    ///////////////////////////////////////////////////////////////////////////

    startTime = time(NULL);

    stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk);
    if (alignmentsFile == NULL) {
        cactusDisk_preCacheStrings(cactusDisk, flowers);
    }
    char *tempFile1 = NULL;
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        flower = stList_get(flowers, i);
        if (!flower_builtBlocks(flower)) { // Do nothing if the flower already has defined blocks
            st_logDebug("Processing flower: %lli\n", flower_getName(flower));

            stCaf_setFlowerForAlignmentFiltering(flower);

            //Set up the graph and add the initial alignments
            stPinchThreadSet *threadSet = stCaf_setup(flower);

            //Build the set of outgroup threads
            outgroupThreads = stCaf_getOutgroupThreads(flower, threadSet);

            //Setup the alignments
            stPinchIterator *pinchIterator;
            stList *alignmentsList = NULL;
            if (alignmentsFile != NULL) {
                assert(i == 0);
                assert(stList_length(flowers) == 1);
                if (sortAlignments) {
                    tempFile1 = getTempFile();
                    stCaf_sortCigarsFileByScoreInDescendingOrder(alignmentsFile, tempFile1);
                    pinchIterator = stPinchIterator_constructFromFile(tempFile1);
                } else {
                    pinchIterator = stPinchIterator_constructFromFile(alignmentsFile);
                }
            } else {
                if (tempFile1 == NULL) {
                    tempFile1 = getTempFile();
                }
                alignmentsList = stCaf_selfAlignFlower(flower, minimumSequenceLengthForBlast, lastzArguments, realign, realignArguments, tempFile1);
                if (sortAlignments) {
                    stCaf_sortCigarsByScoreInDescendingOrder(alignmentsList);
                }
                st_logDebug("Ran lastz and have %" PRIi64 " alignments\n", stList_length(alignmentsList));
                pinchIterator = stPinchIterator_constructFromList(alignmentsList);
            }

            for (int64_t annealingRound = 0; annealingRound < annealingRoundsLength; annealingRound++) {
                int64_t minimumChainLength = annealingRounds[annealingRound];
                int64_t alignmentTrim = annealingRound < alignmentTrimLength ? alignmentTrims[annealingRound] : 0;
                st_logDebug("Starting annealing round with a minimum chain length of %" PRIi64 " and an alignment trim of %" PRIi64 "\n", minimumChainLength, alignmentTrim);
                stPinchIterator_setTrim(pinchIterator, alignmentTrim);

                //Add back in the constraints
                if (pinchIteratorForConstraints != NULL) {
                    stCaf_anneal(threadSet, pinchIteratorForConstraints, filterFn);
                }

                //Do the annealing
                if (annealingRound == 0) {
                    stCaf_anneal(threadSet, pinchIterator, filterFn);
                } else {
                    stCaf_annealBetweenAdjacencyComponents(threadSet, pinchIterator, filterFn);
                }

                // Dump the block degree and length distribution to a file
                if (debugFileName != NULL) {
                    dumpBlockInfo(threadSet, stString_print("%s-blockStats-preMelting", debugFileName));
                }

                printf("Sequence graph statistics after annealing:\n");
                printThreadSetStatistics(threadSet, flower, stdout);

                // Check for poorly-supported blocks--those that have
                // been transitively aligned together but with very
                // few homologies supporting the transitive
                // alignment. These "megablocks" can snarl up the
                // graph so that a lot of extra gets thrown away in
                // the first melting step.
                stPinchThreadSetBlockIt blockIt = stPinchThreadSet_getBlockIt(threadSet);
                stPinchBlock *block;
                while ((block = stPinchThreadSetBlockIt_getNext(&blockIt)) != NULL) {
                    if (stPinchBlock_getDegree(block) > minimumBlockDegreeToCheckSupport) {
                        uint64_t supportingHomologies = stPinchBlock_getNumSupportingHomologies(block);
                        uint64_t possibleSupportingHomologies = numPossibleSupportingHomologies(block, flower);
                        double support = ((double) supportingHomologies) / possibleSupportingHomologies;
                        if (support < minimumBlockHomologySupport) {
                            fprintf(stdout, "Destroyed a megablock with degree %" PRIi64
                                    " and %" PRIi64 " supporting homologies out of a maximum "
                                    "of %" PRIi64 " (%lf%%).\n", stPinchBlock_getDegree(block),
                                    supportingHomologies, possibleSupportingHomologies, support);
                            stPinchBlock_destruct(block);
                        }
                    }
                }

                //Do the melting rounds
                for (int64_t meltingRound = 0; meltingRound < meltingRoundsLength; meltingRound++) {
                    int64_t minimumChainLengthForMeltingRound = meltingRounds[meltingRound];
                    st_logDebug("Starting melting round with a minimum chain length of %" PRIi64 " \n", minimumChainLengthForMeltingRound);
                    if (minimumChainLengthForMeltingRound >= minimumChainLength) {
                        break;
                    }
                    stCaf_melt(flower, threadSet, NULL, 0, minimumChainLengthForMeltingRound, 0, INT64_MAX);
                } st_logDebug("Last melting round of cycle with a minimum chain length of %" PRIi64 " \n", minimumChainLength);
                stCaf_melt(flower, threadSet, NULL, 0, minimumChainLength, breakChainsAtReverseTandems, maximumMedianSequenceLengthBetweenLinkedEnds);
                //This does the filtering of blocks that do not have the required species/tree-coverage/degree.
                stCaf_melt(flower, threadSet, blockFilterFn, blockTrim, 0, 0, INT64_MAX);
            }

            if (removeRecoverableChains) {
                stCaf_meltRecoverableChains(flower, threadSet, breakChainsAtReverseTandems, maximumMedianSequenceLengthBetweenLinkedEnds, recoverableChainsFilter, maxRecoverableChainsIterations, maxRecoverableChainLength);
            }
            if (debugFileName != NULL) {
                dumpBlockInfo(threadSet, stString_print("%s-blockStats-postMelting", debugFileName));
            }

            printf("Sequence graph statistics after melting:\n");
            printThreadSetStatistics(threadSet, flower, stdout);

            // Build a tree for each block, then use each tree to
            // partition the homologies between the ingroups sequences
            // into those that occur before the speciation with the
            // outgroup and those which occur late.

            if (stSet_size(outgroupThreads) > 0 && doPhylogeny) {
                st_logDebug("Starting to build trees and partition ingroup homologies\n");
                stHash *threadStrings = stCaf_getThreadStrings(flower, threadSet);
                st_logDebug("Got sets of thread strings and set of threads that are outgroups\n");
                stCaf_PhylogenyParameters params;
                params.distanceCorrectionMethod = phylogenyDistanceCorrectionMethod;
                params.treeBuildingMethods = phylogenyTreeBuildingMethods;
                params.rootingMethod = phylogenyRootingMethod;
                params.scoringMethod = phylogenyScoringMethod;
                params.breakpointScalingFactor = breakpointScalingFactor;
                params.nucleotideScalingFactor = nucleotideScalingFactor;
                params.skipSingleCopyBlocks = phylogenySkipSingleCopyBlocks;
                params.keepSingleDegreeBlocks = phylogenyKeepSingleDegreeBlocks;
                params.costPerDupPerBase = phylogenyCostPerDupPerBase;
                params.costPerLossPerBase = phylogenyCostPerLossPerBase;
                params.maxBaseDistance = phylogenyMaxBaseDistance;
                params.maxBlockDistance = phylogenyMaxBlockDistance;
                params.numTrees = phylogenyNumTrees;
                params.ignoreUnalignedBases = 1;
                params.onlyIncludeCompleteFeatureBlocks = 0;
                params.doSplitsWithSupportHigherThanThisAllAtOnce = phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce;
                params.numTreeBuildingThreads = numTreeBuildingThreads;

                assert(params.numTreeBuildingThreads >= 1);

                stCaf_buildTreesToRemoveAncientHomologies(
                    threadSet, phylogenyHomologyUnitType, threadStrings, outgroupThreads, flower, &params,
                    debugFileName == NULL ? NULL : stString_print("%s-phylogeny", debugFileName), referenceEventHeader);
                stHash_destruct(threadStrings);
                st_logDebug("Finished building trees\n");

                if (removeRecoverableChains) {
                    // We melt recoverable chains after splitting, as
                    // well as before, to alleviate coverage loss
                    // caused by bad splits.
                    stCaf_meltRecoverableChains(flower, threadSet, breakChainsAtReverseTandems, maximumMedianSequenceLengthBetweenLinkedEnds, recoverableChainsFilter, maxRecoverableChainsIterations, maxRecoverableChainLength);
                }

                // Enforce the block constraints on minimum degree,
                // etc. after splitting.
                stCaf_melt(flower, threadSet, blockFilterFn, 0, 0, 0, INT64_MAX);
            }

            //Sort out case when we allow blocks of degree 1
            if (minimumDegree < 2) {
                st_logDebug("Creating degree 1 blocks\n");
                stCaf_makeDegreeOneBlocks(threadSet);
                stCaf_melt(flower, threadSet, blockFilterFn, blockTrim, 0, 0, INT64_MAX);
            } else if (maximumAdjacencyComponentSizeRatio < INT64_MAX) { //Deal with giant components
                st_logDebug("Breaking up components greedily\n");
                stCaf_breakupComponentsGreedily(threadSet, maximumAdjacencyComponentSizeRatio);
            }

            //Finish up
            stCaf_finish(flower, threadSet, chainLengthForBigFlower, longChain, minLengthForChromosome,
                    proportionOfUnalignedBasesForNewChromosome); //Flower is then destroyed at this point.
            st_logInfo("Ran the cactus core script\n");

            //Cleanup
            stPinchThreadSet_destruct(threadSet);
            stPinchIterator_destruct(pinchIterator);
            stSet_destruct(outgroupThreads);

            if (alignmentsList != NULL) {
                stList_destruct(alignmentsList);
            }
            st_logInfo("Cleaned up from main loop\n");
        } else {
            st_logInfo("We've already built blocks / alignments for this flower\n");
        }
    }
    stList_destruct(flowers);
    if (tempFile1 != NULL) {
        st_system("rm %s", tempFile1);
    }

    if (constraintsFile != NULL) {
        stPinchIterator_destruct(pinchIteratorForConstraints);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Write the flower to disk.
    ///////////////////////////////////////////////////////////////////////////
    st_logDebug("Writing the flowers to disk\n");
    cactusDisk_write(cactusDisk);
    st_logInfo("Updated the flower on disk and %" PRIi64 " seconds have elapsed\n", time(NULL) - startTime);

    ///////////////////////////////////////////////////////////////////////////
    // Clean up.
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_destruct(cactusDisk);
}

Exemple #23

0

Afficher le fichier

Fichier : sonLibNaiveConnectivity.c Projet : benedictpaten/sonLib

void stNaiveConnectivity_destruct(stNaiveConnectivity *connectivity) {
    invalidateCache(connectivity);
    stHash_destruct(connectivity->nodesToAdjList);
    free(connectivity);
}

Exemple #24

0

Afficher le fichier

Fichier : sonLibSet.c Projet : adderan/sonLib

void stSet_destruct(stSet *set) {
    stHash_destruct(set->hash);
    free(set);
}

Exemple #25

0

Afficher le fichier

Fichier : test.mafToFastaStitcherAPI.c Projet : adamnovak/mafTools

static void test_addBlockToHash_4(CuTest *testCase) {
    // concatenation with sequnece breakpoint due to *strand* alone
    // note that name3 is well within the interstitial boundary, the two blocks
    // essentially looking like >>>>>>>>>>>>> <<<<< (strand diffs)
    options_t *options = options_construct();
    options->breakpointPenalty = 10;
    options->interstitialSequence = 5;
    stList *observedList = stList_construct3(0, free);
    stList *expectedList = stList_construct3(0, free);
    stHash *observedHash = createBlockHashFromString("a score=0\n"
                                                     "s reference.chr0 0 13 + 158545518 gcagctgaaaaca\n"
                                                     "s name.chr1      0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name2.chr1     0 10 +       100 ATGT---ATGCCG\n"
                                                     "s name3.chr1     0 13 +       100 GCAGCTGAAAACA\n",
                                                     observedList
                                                     );
    mafBlock_t *mb = maf_newMafBlockListFromString("a score=0 test\n"
                                                   "s reference.chr0 13  5 + 158545518 ACGTA\n"
                                                   "s name.chr1      12  5 +       100 gtcGG\n"
                                                   "s name2.chr1     10  5 +       100 ATGTg\n"
                                                   "s name3.chr1     82  5 -       100 GGGGG\n"
                                                   , 3);
    stHash *expectedHash = NULL;
    expectedHash = createBlockHashFromString("a score=0\n"
                                             "s reference.chr0 0 18 + 158545518 gcagctgaaaaca------------ACGTA\n"
                                             "s name.chr1      0 17 +       100 ATGT---ATGCCGac----------gtcGG\n"
                                             "s name2.chr1     0 15 +       100 ATGT---ATGCCG------------ATGTg\n"
                                             "s name3.chr1     0 28 +       100 GCAGCTGAAAACA--NNNNNNNNNNGGGGG\n",
                                             expectedList
                                             );
    row_t *r = stHash_search(expectedHash, "name3");
    r->prevRightPos = 86;
    r->strand = '*';
    r->prevStrand = '-';
    stHash *seqHash = createSeqHashFromString("name.chr1", "ATGTATGCCGacgtc"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
                                              "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG");
    mtfseq_t *mtfs = newMtfseqFromString("gcagctgaaaacaACGTA"
                                         "tttttttttttttttttttttttttttttttt"
                                         "tttttttttttttttttttttttttttttttttttttttttttttttttt");
    stHash_insert(seqHash, stString_copy("reference.chr0"), mtfs);
    mtfs = newMtfseqFromString("ATGTATGCCGATGTg"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
                               "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
    stHash_insert(seqHash, stString_copy("name2.chr1"), mtfs);
    mtfs = newMtfseqFromString("GCAGCTGAAAACACCCCCgggggggggggggggggggggggggggggggg"
                               "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                               );
    stHash_insert(seqHash, stString_copy("name3.chr1"), mtfs);
    addMafBlockToRowHash(observedHash, seqHash, observedList, mb, options);
    CuAssertTrue(testCase, hashesAreEqual(observedHash, expectedHash));
    CuAssertTrue(testCase, listsAreEqual(observedList, expectedList));
    // clean up
    stHash_destruct(observedHash);
    stHash_destruct(expectedHash);
    stHash_destruct(seqHash);
    stList_destruct(observedList);
    stList_destruct(expectedList);
    maf_destroyMafBlockList(mb);
    destroyOptions(options);
}

Exemple #26

0

Afficher le fichier

Fichier : endAligner.c Projet : benedictpaten/cactus

stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength,
        bool useProgressiveMerging, float gapGamma,
        PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) {
    //Make an alignment of the sequences in the ends

    //Get the adjacency sequences to be aligned.
    Cap *cap;
    End_InstanceIterator *it = end_getInstanceIterator(end);
    stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct);
    stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct);
    stHash *endInstanceNumbers = stHash_construct2(NULL, free);
    while((cap = end_getNext(it)) != NULL) {
        if(cap_getSide(cap)) {
            cap = cap_getReverse(cap);
        }
        AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength);
        stList_append(sequences, adjacencySequence);
        assert(cap_getAdjacency(cap) != NULL);
        End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap)));
        stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd)));
        //Increase count of seqfrags with a given end.
        int64_t *c = stHash_search(endInstanceNumbers, otherEnd);
        if(c == NULL) {
            c = st_calloc(1, sizeof(int64_t));
            assert(*c == 0);
            stHash_insert(endInstanceNumbers, otherEnd, c);
        }
        (*c)++;
    }
    end_destructInstanceIterator(it);

    //Get the alignment.
    MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters);

    //Build an array of weights to reweight pairs in the alignment.
    int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing
    //common ends.
    for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) {
        stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i);
        int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1);
        int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2);
        assert(seq1 != seq2);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seq1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seq2);
        int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId
                ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds;
        pairwiseAlignmentsPerSequence[seq1]++;
        pairwiseAlignmentsPerSequence[seq2]++;
    }
    //Now calculate score adjustments.
    double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    for(int64_t i=0; i<stList_length(seqFrags); i++) {
        SeqFrag *seqFrag = stList_get(seqFrags, i);
        End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId);
        assert(otherEnd != NULL);
        assert(stHash_search(endInstanceNumbers, otherEnd) != NULL);
        int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd);
        int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber;

        assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0);

        //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]);
        //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i];
        if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) {
            scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i];
            assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber);
        }
        else {
            scoreAdjustmentsNonCommonEnds[i] = INT64_MIN;
        }
        if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) {
            scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i];
            assert(scoreAdjustmentsCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1);
        }
        else {
            scoreAdjustmentsCommonEnds[i] = INT64_MIN;
        }
    }

	//Convert the alignment pairs to an alignment of the caps..
    stSortedSet *sortedAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    while(stList_length(mA->alignedPairs) > 0) {
        stIntTuple *alignedPair = stList_pop(mA->alignedPairs);
        assert(stIntTuple_length(alignedPair) == 5);
        int64_t seqIndex1 = stIntTuple_get(alignedPair, 1);
        int64_t seqIndex2 = stIntTuple_get(alignedPair, 3);
        AdjacencySequence *i = stList_get(sequences, seqIndex1);
        AdjacencySequence *j = stList_get(sequences, seqIndex2);
        assert(i != j);
        int64_t offset1 = stIntTuple_get(alignedPair, 2);
        int64_t offset2 = stIntTuple_get(alignedPair, 4);
        int64_t score = stIntTuple_get(alignedPair, 0);
        if(score <= 0) { //Happens when indel probs are included
            score = 1; //This is the minimum
        }
        assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2);
        assert(seqFrag1 != seqFrag2);
        double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds;
        assert(scoreAdjustments[seqIndex1] != INT64_MIN);
        assert(scoreAdjustments[seqIndex2] != INT64_MIN);
        AlignedPair *alignedPair2 = alignedPair_construct(
                i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand,
                j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand,
                score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here.
        assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL);
        assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL);
        stSortedSet_insert(sortedAlignment, alignedPair2);
        stSortedSet_insert(sortedAlignment, alignedPair2->reverse);
        stIntTuple_destruct(alignedPair);
    }

    //Cleanup
    stList_destruct(seqFrags);
    stList_destruct(sequences);
    free(pairwiseAlignmentsPerSequenceNonCommonEnds);
    free(pairwiseAlignmentsPerSequenceCommonEnds);
    free(scoreAdjustmentsNonCommonEnds);
    free(scoreAdjustmentsCommonEnds);
    multipleAlignment_destruct(mA);
    stHash_destruct(endInstanceNumbers);

    return sortedAlignment;
}

Exemple #27

0

Afficher le fichier

Fichier : cactusFacesBuilding.c Projet : benedictpaten/cactus

/*
 * Constructs a face locally from a given Cap but without precomputed liftedEdges
 */
void buildFaces_reconstructFromCap(Cap * startingCap, Flower * flower) {
    Face *face = face_construct(flower);
    stList * liftedEdges;
    stList *topNodes = stList_construct3(16, NULL);
    stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction,
            buildFaces_key_eq_fn, NULL, buildFaces_destructValue);
    Cap *cap, *bottomNode, *ancestor;
    int64_t index, index2;

    printf("Constructing new face");

    // Establishlist of top nodes and fill liftedEdges table
    buildFaces_fillTopNodeList2(startingCap, topNodes, liftedEdgesTable);

#ifndef NDEBUG
    // What, no top nodes!?
    assert(stList_length(topNodes));
#endif

    // Initialize data structure
    face_allocateSpace(face, stList_length(topNodes));

    // For every top node
    for (index = 0; index < stList_length(topNodes); index++) {
        cap = stList_get(topNodes, index);
        face_setTopNode(face, index, cap);
        liftedEdges = stHash_search(liftedEdgesTable, cap);

        if (!liftedEdges) {
            face_setBottomNodeNumber(face, index, 0);
            continue;
        }

        face_setBottomNodeNumber(face, index, stList_length(liftedEdges));
        // For every bottom node of that top node
        for (index2 = 0; index2 < stList_length(liftedEdges); index2++) {
            bottomNode
                    = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode;
            face_addBottomNode(face, index, bottomNode);

            assert(cap_getAdjacency(bottomNode));
            ancestor = cap_getTopCap(cap_getPositiveOrientation(
                    cap_getAdjacency(bottomNode)));
            if (cap_getAdjacency(cap) != ancestor)
                face_setDerivedDestination(face, index, index2, ancestor);
            else
                face_setDerivedDestination(face, index, index2, NULL);

#ifndef NDEBUG
            // If bottom nodes part of top nodes
            if (stList_contains(topNodes, cap_getPositiveOrientation(
                    ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode)))
                abort();
#endif
        }
    }

    // Clean up
    stList_destruct(topNodes);
    stHash_destruct(liftedEdgesTable);
}