Group *flower_getParentGroup(Flower *flower) { if (flower->parentFlowerName == NULL_NAME) { return NULL; } Flower *flower2 = cactusDisk_getFlower(flower_getCactusDisk(flower), flower->parentFlowerName); assert(flower2 != NULL); return flower_getGroup(flower2, flower_getName(flower)); }
void flower_unloadParent(Flower *flower) { Name parentName = flower->parentFlowerName; if (parentName != NULL_NAME) { CactusDisk *cactusDisk = flower_getCactusDisk(flower); if (cactusDisk_flowerIsLoaded(cactusDisk, parentName)) { Flower *parentFlower = cactusDisk_getFlower(cactusDisk, parentName); flower_unload(parentFlower); } } }
void testCactusDisk_getFlower(CuTest* testCase) { cactusDiskTestSetup(); Flower *flower = flower_construct(cactusDisk); Flower *flower2 = flower_construct(cactusDisk); CuAssertTrue(testCase, cactusDisk_getFlower(cactusDisk, flower_getName(flower)) == flower); CuAssertTrue(testCase, cactusDisk_getFlower(cactusDisk, flower_getName(flower2)) == flower2); //now try closing the disk, then reloading it, to see if we get the same result. Name name1 = flower_getName(flower); Name name2 = flower_getName(flower2); cactusDisk_write(cactusDisk); cactusDisk_destruct(cactusDisk); cactusDisk = cactusDisk_construct(conf, 0); flower = cactusDisk_getFlower(cactusDisk, name1); flower2 = cactusDisk_getFlower(cactusDisk, name2); CuAssertTrue(testCase, flower != NULL); CuAssertTrue(testCase, flower2 != NULL); CuAssertTrue(testCase, flower_getName(flower) == name1); CuAssertTrue(testCase, flower_getName(flower2) == name2); cactusDiskTestTeardown(); }
int main(int argc, char *argv[]) { char *cactusDiskString = NULL; stKVDatabaseConf *kvDatabaseConf; CactusDisk *cactusDisk; Flower *flower; Flower_SequenceIterator *flowerIt; Sequence *sequence; struct option longopts[] = { {"cactusDisk", required_argument, NULL, 'c' }, {0, 0, 0, 0} }; int flag; while((flag = getopt_long(argc, argv, "", longopts, NULL)) != -1) { switch(flag) { case 'c': cactusDiskString = stString_copy(optarg); break; case '?': default: usage(); return 1; } } if (cactusDiskString == NULL) { st_errAbort("--cactusDisk option must be provided"); } kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskString); cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); // Get top-level flower. flower = cactusDisk_getFlower(cactusDisk, 0); flowerIt = flower_getSequenceIterator(flower); while((sequence = flower_getNextSequence(flowerIt)) != NULL) { MetaSequence *metaSequence = sequence_getMetaSequence(sequence); const char *header; char *firstToken, *newHeader; stList *tokens; // Strip the ID token from the header (should be the first // |-separated token) and complain if there isn't one. header = metaSequence_getHeader(metaSequence); tokens = fastaDecodeHeader(header); assert(stList_length(tokens) > 1); firstToken = stList_removeFirst(tokens); assert(!strncmp(firstToken, "id=", 3)); free(firstToken); newHeader = fastaEncodeHeader(tokens); metaSequence_setHeader(metaSequence, newHeader); } cactusDisk_write(cactusDisk); }
int main(int argc, char *argv[]) { st_setLogLevelFromString(argv[1]); st_logDebug("Set up logging\n"); stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); stKVDatabaseConf_destruct(kvDatabaseConf); st_logDebug("Set up the flower disk\n"); Name flowerName = cactusMisc_stringToName(argv[3]); Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName); int64_t totalBases = flower_getTotalBaseLength(flower); int64_t totalEnds = flower_getEndNumber(flower); int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower); int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower); int64_t totalCaps = flower_getCapNumber(flower); int64_t totalBlocks = flower_getBlockNumber(flower); int64_t totalGroups = flower_getGroupNumber(flower); int64_t totalChains = flower_getChainNumber(flower); int64_t totalLinkGroups = 0; int64_t maxEndDegree = 0; int64_t maxAdjacencyLength = 0; int64_t totalEdges = 0; Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while((end = flower_getNextEnd(endIt)) != NULL) { assert(end_getOrientation(end)); if(end_getInstanceNumber(end) > maxEndDegree) { maxEndDegree = end_getInstanceNumber(end); } stSortedSet *ends = stSortedSet_construct(); End_InstanceIterator *capIt = end_getInstanceIterator(end); Cap *cap; while((cap = end_getNext(capIt)) != NULL) { if(cap_getSequence(cap) != NULL) { Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap)); stSortedSet_insert(ends, adjacentEnd); int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap); if(adjacencyLength < 0) { adjacencyLength *= -1; } assert(adjacencyLength >= 1); if(adjacencyLength >= maxAdjacencyLength) { maxAdjacencyLength = adjacencyLength; } } } end_destructInstanceIterator(capIt); totalEdges += stSortedSet_size(ends); if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works. totalEdges += 1; } stSortedSet_destruct(ends); } assert(totalEdges % 2 == 0); flower_destructEndIterator(endIt); Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while((group = flower_getNextGroup(groupIt)) != NULL) { if(group_getLink(group) != NULL) { totalLinkGroups++; } } flower_destructGroupIterator(groupIt); printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n", flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups); return 0; }
int main(int argc, char *argv[]) { /* * Script for adding a reference genome to a flower. */ /* * Arguments/options */ char * logLevelString = NULL; char * cactusDiskDatabaseString = NULL; char *referenceEventString = (char *) cactusMisc_getDefaultReferenceEventHeader(); char *outputFile = NULL; Name flowerName = NULL_NAME; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'c' }, { "flowerName", required_argument, 0, 'd' }, { "referenceEventString", required_argument, 0, 'g' }, { "help", no_argument, 0, 'h' }, { "outputFile", required_argument, 0, 'k' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "a:c:d:g:hk:", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = stString_copy(optarg); break; case 'c': cactusDiskDatabaseString = stString_copy(optarg); break; case 'd': flowerName = cactusMisc_stringToName(optarg); break; case 'g': referenceEventString = stString_copy(optarg); break; case 'h': usage(); return 0; case 'k': outputFile = stString_copy(optarg); break; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// assert(cactusDiskDatabaseString != NULL); ////////////////////////////////////////////// //Set up logging ////////////////////////////////////////////// st_setLogLevelFromString(logLevelString); ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString( cactusDiskDatabaseString); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); stKVDatabaseConf_destruct(kvDatabaseConf); st_logInfo("Set up the flower disk\n"); /////////////////////////////////////////////////////////////////////////// // Get the set of flowers to manipulate /////////////////////////////////////////////////////////////////////////// Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName); /////////////////////////////////////////////////////////////////////////// // Get the reference event name /////////////////////////////////////////////////////////////////////////// Event *referenceEvent = eventTree_getEventByHeader( flower_getEventTree(flower), referenceEventString); assert(referenceEvent != NULL); Name referenceEventName = event_getName(referenceEvent); /////////////////////////////////////////////////////////////////////////// // Now process each flower in turn. /////////////////////////////////////////////////////////////////////////// if(outputFile == NULL) { st_errAbort("No output file specified\n"); } FILE *fileHandle = fopen(outputFile, "w"); printFastaSequences(flower, fileHandle, referenceEventName); if(fileHandle != NULL) { fclose(fileHandle); } /////////////////////////////////////////////////////////////////////////// //Clean up memory /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); //return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. free(cactusDiskDatabaseString); free(referenceEventString); free(logLevelString); st_logInfo("Cleaned stuff up and am finished\n"); //while(1); return 0; }
int main(int argc, char *argv[]) { /* * Open the database. * Construct a flower. * Construct an event tree representing the species tree. * For each sequence contruct two ends each containing an cap. * Make a file for the sequence. * Link the two caps. * Finish! */ int64_t key, j; Group *group; Flower_EndIterator *endIterator; End *end; bool makeEventHeadersAlphaNumeric = 0; /* * Arguments/options */ char * logLevelString = NULL; char * speciesTree = NULL; char * outgroupEvents = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "speciesTree", required_argument, 0, 'f' }, { "outgroupEvents", required_argument, 0, 'g' }, { "help", no_argument, 0, 'h' }, { "makeEventHeadersAlphaNumeric", no_argument, 0, 'i' }, { 0, 0, 0, 0 } }; int option_index = 0; key = getopt_long(argc, argv, "a:b:f:hg:i", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = optarg; break; case 'b': cactusDiskDatabaseString = optarg; break; case 'f': speciesTree = optarg; break; case 'g': outgroupEvents = optarg; break; case 'h': usage(); return 0; case 'i': makeEventHeadersAlphaNumeric = 1; break; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// //assert(logLevelString == NULL || strcmp(logLevelString, "CRITICAL") == 0 || strcmp(logLevelString, "INFO") == 0 || strcmp(logLevelString, "DEBUG") == 0); assert(cactusDiskDatabaseString != NULL); assert(speciesTree != NULL); ////////////////////////////////////////////// //Set up logging ////////////////////////////////////////////// st_setLogLevelFromString(logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString); for (j = optind; j < argc; j++) { st_logInfo("Sequence file/directory %s\n", argv[j]); } ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); if (stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeTokyoCabinet || stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeKyotoTycoon) { assert(stKVDatabaseConf_getDir(kvDatabaseConf) != NULL); cactusDisk = cactusDisk_construct2(kvDatabaseConf, "cactusSequences"); } else { cactusDisk = cactusDisk_construct(kvDatabaseConf, 1); } st_logInfo("Set up the flower disk\n"); ////////////////////////////////////////////// //Construct the flower ////////////////////////////////////////////// if (cactusDisk_getFlower(cactusDisk, 0) != NULL) { cactusDisk_destruct(cactusDisk); st_logInfo("The first flower already exists\n"); return 0; } flower = flower_construct2(0, cactusDisk); assert(flower_getName(flower) == 0); st_logInfo("Constructed the flower\n"); ////////////////////////////////////////////// //Construct the event tree ////////////////////////////////////////////// st_logInfo("Going to build the event tree with newick string: %s\n", speciesTree); stTree *tree = stTree_parseNewickString(speciesTree); st_logInfo("Parsed the tree\n"); if (makeEventHeadersAlphaNumeric) { makeEventHeadersAlphaNumericFn(tree); } stTree_setBranchLength(tree, INT64_MAX); checkBranchLengthsAreDefined(tree); eventTree = eventTree_construct2(flower); //creates the event tree and the root even totalEventNumber = 1; st_logInfo("Constructed the basic event tree\n"); // Construct a set of outgroup names so that ancestral outgroups // get recognized. stSet *outgroupNameSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free); if(outgroupEvents != NULL) { stList *outgroupNames = stString_split(outgroupEvents); for(int64_t i = 0; i < stList_length(outgroupNames); i++) { char *outgroupName = stList_get(outgroupNames, i); stSet_insert(outgroupNameSet, stString_copy(outgroupName)); } stList_destruct(outgroupNames); } //now traverse the tree j = optind; assignEventsAndSequences(eventTree_getRootEvent(eventTree), tree, outgroupNameSet, argv, &j); char *eventTreeString = eventTree_makeNewickString(eventTree); st_logInfo( "Constructed the initial flower with %" PRIi64 " sequences and %" PRIi64 " events with string: %s\n", totalSequenceNumber, totalEventNumber, eventTreeString); assert(event_getSubTreeBranchLength(eventTree_getRootEvent(eventTree)) >= 0.0); free(eventTreeString); //assert(0); ////////////////////////////////////////////// //Label any outgroup events. ////////////////////////////////////////////// if (outgroupEvents != NULL) { stList *outgroupEventsList = stString_split(outgroupEvents); for (int64_t i = 0; i < stList_length(outgroupEventsList); i++) { char *outgroupEvent = makeEventHeadersAlphaNumeric ? makeAlphaNumeric(stList_get(outgroupEventsList, i)) : stString_copy(stList_get(outgroupEventsList, i)); Event *event = eventTree_getEventByHeader(eventTree, outgroupEvent); if (event == NULL) { st_errAbort("Got an outgroup string that does not match an event, outgroup string %s", outgroupEvent); } assert(!event_isOutgroup(event)); event_setOutgroupStatus(event, 1); assert(event_isOutgroup(event)); free(outgroupEvent); } stList_destruct(outgroupEventsList); } ////////////////////////////////////////////// //Construct the terminal group. ////////////////////////////////////////////// if (flower_getEndNumber(flower) > 0) { group = group_construct2(flower); endIterator = flower_getEndIterator(flower); while ((end = flower_getNextEnd(endIterator)) != NULL) { end_setGroup(end, group); } flower_destructEndIterator(endIterator); assert(group_isLeaf(group)); // Create a one link chain if there is only one pair of attached ends.. group_constructChainForLink(group); assert(!flower_builtBlocks(flower)); } else { flower_setBuiltBlocks(flower, 1); } /////////////////////////////////////////////////////////////////////////// // Write the flower to disk. /////////////////////////////////////////////////////////////////////////// //flower_check(flower); cactusDisk_write(cactusDisk); st_logInfo("Updated the flower on disk\n"); /////////////////////////////////////////////////////////////////////////// // Cleanup. /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. stSet_destruct(outgroupNameSet); stTree_destruct(tree); stKVDatabaseConf_destruct(kvDatabaseConf); return 0; }
int main(int argc, char *argv[]) { char * logLevelString = NULL; char * cactusDiskDatabaseString = NULL; char * flowerName = NULL; char * outputFile = NULL; char *referenceEventString = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "referenceEventString", required_argument, 0, 'b' }, { "cactusDisk", required_argument, 0, 'c' }, { "flowerName", required_argument, 0, 'e' }, { "outputFile", required_argument, 0, 'f' }, { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "a:b:c:d:e:f:h", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = stString_copy(optarg); break; case 'b': referenceEventString = stString_copy(optarg); break; case 'c': cactusDiskDatabaseString = stString_copy(optarg); break; case 'e': flowerName = stString_copy(optarg); break; case 'f': outputFile = stString_copy(optarg); break; case 'h': usage(); return 0; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// assert(flowerName != NULL); assert(referenceEventString != NULL); assert(cactusDiskDatabaseString != NULL); assert(outputFile != NULL); ////////////////////////////////////////////// //Set up logging ////////////////////////////////////////////// st_setLogLevelFromString(logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("Flower name : %s\n", flowerName); st_logInfo("Sequence name : %s\n", referenceEventString); st_logInfo("Output file : %s\n", outputFile); ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString( cactusDiskDatabaseString); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, false, true); st_logInfo("Set up the flower disk\n"); /////////////////////////////////////////////////////////////////////////// // Parse the basic reconstruction problem /////////////////////////////////////////////////////////////////////////// Flower *flower = cactusDisk_getFlower(cactusDisk, cactusMisc_stringToName( flowerName)); st_logInfo("Parsed the top level flower of the cactus tree to check\n"); /////////////////////////////////////////////////////////////////////////// // Recursive check the flowers. /////////////////////////////////////////////////////////////////////////// //int64_t startTime = time(NULL); //flower = flower_addReferenceSequence(flower, cactusDisk, name); //st_logInfo("Added the reference sequence in %" PRIi64 " seconds/\n", time(NULL) - startTime); int64_t numSequences = flower_getSequenceNumber(flower); //Make sure that referenceSequence has already been added: if(getSequenceMatchesEvent(flower, referenceEventString) == NULL && numSequences > 0){ fprintf(stderr, "No reference sequence found in cactusDisk\n"); exit(EXIT_FAILURE); } FILE *fileHandle = fopen(outputFile, "w"); if (numSequences > 0) { getReferenceSequences(fileHandle, flower, referenceEventString); } else { st_logCritical("cactus_getReferenceSeq found no reference sequence in empty cactus disk %s", cactusDiskDatabaseString); } fclose(fileHandle); /////////////////////////////////////////////////////////////////////////// // Clean up. /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. stKVDatabaseConf_destruct(kvDatabaseConf); return 0; }
int main(int argc, char *argv[]) { Flower *flower; FILE *fileHandle; /* * Arguments/options */ char * logLevelString = NULL; char * cactusDiskDatabaseString = NULL; char * flowerName = NULL; char * outputFile = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'c' }, { "flowerName", required_argument, 0, 'd' }, { "outputFile", required_argument, 0, 'e' }, { "scaleNodeSizes", no_argument, 0, 'f' }, { "nameLabels", no_argument, 0, 'g' }, { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "a:c:d:e:fgh", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = stString_copy(optarg); break; case 'c': cactusDiskDatabaseString = stString_copy(optarg); break; case 'd': flowerName = stString_copy(optarg); break; case 'e': outputFile = stString_copy(optarg); break; case 'f': scaleNodeSizes = !scaleNodeSizes; break; case 'g': nameLabels = !nameLabels; break; case 'h': usage(); return 0; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// assert(cactusDiskDatabaseString != NULL); assert(flowerName != NULL); assert(outputFile != NULL); ////////////////////////////////////////////// //Set up logging ////////////////////////////////////////////// st_setLogLevelFromString(logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("Flower name : %s\n", flowerName); st_logInfo("Output graph file : %s\n", outputFile); ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); st_logInfo("Set up the flower disk\n"); /////////////////////////////////////////////////////////////////////////// // Parse the basic reconstruction problem /////////////////////////////////////////////////////////////////////////// flower = cactusDisk_getFlower(cactusDisk, cactusMisc_stringToName(flowerName)); st_logInfo("Parsed the top level flower of the cactus tree to build\n"); /////////////////////////////////////////////////////////////////////////// // Build the graph. /////////////////////////////////////////////////////////////////////////// totalProblemSize = flower_getTotalBaseLength(flower); fileHandle = fopen(outputFile, "w"); graphViz_setupGraphFile(fileHandle); makeCactusTree_flower(flower, fileHandle, NULL, NULL); graphViz_finishGraphFile(fileHandle); fclose(fileHandle); st_logInfo("Written the tree to file\n"); /////////////////////////////////////////////////////////////////////////// // Clean up. /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); stKVDatabaseConf_destruct(kvDatabaseConf); return 0; }
//============================== MAIN ========================================= int main(int argc, char *argv[]) { Flower *flower; /* * Arguments/options */ char * st_logLevelString = NULL; char * cactusDiskDatabaseString = NULL; char * flowerName = "0"; char * outputFile = NULL; char * species = NULL; char * geneFile = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while(1) { static struct option long_options[] = { { "genePslFile", required_argument, 0, 'g' }, { "species", required_argument, 0, 's' }, { "st_logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'c' }, { "outputFile", required_argument, 0, 'o' }, { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "s:g:o:a:c:h", long_options, &option_index); if(key == -1) { break; } switch(key) { case 'a': st_logLevelString = stString_copy(optarg); break; case 'c': cactusDiskDatabaseString = stString_copy(optarg); break; case 'o': outputFile = stString_copy(optarg); break; case 's': species = stString_copy(optarg); break; case 'g': geneFile = stString_copy(optarg); break; case 'h': usage(); return 0; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// assert(cactusDiskDatabaseString != NULL); assert(outputFile != NULL); assert(species != NULL); assert(geneFile != NULL); ////////////////////////////////////////////// //Set up st_logging ////////////////////////////////////////////// st_setLogLevelFromString(st_logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString); st_logInfo("Output file : %s\n", outputFile); st_logInfo("Species: %s\n", species); st_logInfo("GenePslFile: %s\n", geneFile); ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); st_logInfo("Set up the flower disk\n"); /////////////////////////////////////////////////////////////////////////// // Parse the basic reconstruction problem /////////////////////////////////////////////////////////////////////////// flower = cactusDisk_getFlower(cactusDisk, cactusMisc_stringToName(flowerName)); st_logInfo("Parsed the top level flower of the cactus tree to check\n"); /////////////////////////////////////////////////////////////////////////// // Recursive check the flowers. /////////////////////////////////////////////////////////////////////////// int64_t startTime = time(NULL); FILE *fileHandle = fopen(outputFile, "w"); struct bed *gene = bedLoadAll(geneFile); mapGenes(flower, fileHandle, gene, species); fclose(fileHandle); st_logInfo("Map genes in %" PRIi64 " seconds/\n", time(NULL) - startTime); /////////////////////////////////////////////////////////////////////////// // Clean up. /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); return 0; }
void testFlower_getName(CuTest* testCase) { cactusFlowerTestSetup(); CuAssertTrue(testCase, flower_getName(flower) != NULL_NAME); CuAssertTrue(testCase, cactusDisk_getFlower(cactusDisk, flower_getName(flower)) == flower); cactusFlowerTestTeardown(); }
int main(int argc, char *argv[]) { char * logLevelString = NULL; char * cactusDiskDatabaseString = NULL; int64_t i, j; int64_t spanningTrees = 10; int64_t maximumLength = 1500; bool useProgressiveMerging = 0; float matchGamma = 0.5; bool useBanding = 0; int64_t k; stList *listOfEndAlignmentFiles = NULL; char *endAlignmentsToPrecomputeOutputFile = NULL; bool calculateWhichEndsToComputeSeparately = 0; int64_t largeEndSize = 1000000; int64_t chainLengthForBigFlower = 1000000; int64_t longChain = 2; char *ingroupCoverageFilePath = NULL; int64_t minimumSizeToRescue = 1; double minimumCoverageToRescue = 0.0; PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters = pairwiseAlignmentBandingParameters_construct(); /* * Setup the input parameters for cactus core. */ bool pruneOutStubAlignments = 0; /* * Parse the options. */ while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "help", no_argument, 0, 'h' }, { "spanningTrees", required_argument, 0, 'i' }, { "maximumLength", required_argument, 0, 'j' }, { "useBanding", no_argument, 0, 'k' }, { "gapGamma", required_argument, 0, 'l' }, { "matchGamma", required_argument, 0, 'L' }, { "splitMatrixBiggerThanThis", required_argument, 0, 'o' }, { "anchorMatrixBiggerThanThis", required_argument, 0, 'p' }, { "repeatMaskMatrixBiggerThanThis", required_argument, 0, 'q' }, { "diagonalExpansion", required_argument, 0, 'r' }, { "constraintDiagonalTrim", required_argument, 0, 't' }, { "minimumDegree", required_argument, 0, 'u' }, { "alignAmbiguityCharacters", no_argument, 0, 'w' }, { "pruneOutStubAlignments", no_argument, 0, 'y' }, { "minimumIngroupDegree", required_argument, 0, 'A' }, { "minimumOutgroupDegree", required_argument, 0, 'B' }, { "precomputedAlignments", required_argument, 0, 'D' }, { "endAlignmentsToPrecomputeOutputFile", required_argument, 0, 'E' }, { "useProgressiveMerging", no_argument, 0, 'F' }, { "calculateWhichEndsToComputeSeparately", no_argument, 0, 'G' }, { "largeEndSize", required_argument, 0, 'I' }, {"ingroupCoverageFile", required_argument, 0, 'J'}, {"minimumSizeToRescue", required_argument, 0, 'K'}, {"minimumCoverageToRescue", required_argument, 0, 'M'}, { "minimumNumberOfSpecies", required_argument, 0, 'N' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "a:b:hi:j:kl:o:p:q:r:t:u:wy:A:B:D:E:FGI:J:K:L:M:N:", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = stString_copy(optarg); st_setLogLevelFromString(logLevelString); break; case 'b': cactusDiskDatabaseString = stString_copy(optarg); break; case 'h': usage(); return 0; case 'i': i = sscanf(optarg, "%" PRIi64 "", &spanningTrees); (void) i; assert(i == 1); assert(spanningTrees >= 0); break; case 'j': i = sscanf(optarg, "%" PRIi64 "", &maximumLength); assert(i == 1); assert(maximumLength >= 0); break; case 'k': useBanding = !useBanding; break; case 'l': i = sscanf(optarg, "%f", &pairwiseAlignmentBandingParameters->gapGamma); assert(i == 1); assert(pairwiseAlignmentBandingParameters->gapGamma >= 0.0); break; case 'L': i = sscanf(optarg, "%f", &matchGamma); assert(i == 1); assert(matchGamma >= 0.0); break; case 'o': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->splitMatrixBiggerThanThis = (int64_t) k * k; break; case 'p': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->anchorMatrixBiggerThanThis = (int64_t) k * k; break; case 'q': i = sscanf(optarg, "%" PRIi64 "", &k); assert(i == 1); assert(k >= 0); pairwiseAlignmentBandingParameters->repeatMaskMatrixBiggerThanThis = (int64_t) k * k; break; case 'r': i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->diagonalExpansion); assert(i == 1); assert(pairwiseAlignmentBandingParameters->diagonalExpansion >= 0); assert(pairwiseAlignmentBandingParameters->diagonalExpansion % 2 == 0); break; case 't': i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->constraintDiagonalTrim); assert(i == 1); assert(pairwiseAlignmentBandingParameters->constraintDiagonalTrim >= 0); break; case 'u': i = sscanf(optarg, "%" PRIi64 "", &minimumDegree); assert(i == 1); break; case 'w': pairwiseAlignmentBandingParameters->alignAmbiguityCharacters = 1; break; case 'y': pruneOutStubAlignments = 1; break; case 'A': i = sscanf(optarg, "%" PRIi64 "", &minimumIngroupDegree); assert(i == 1); break; case 'B': i = sscanf(optarg, "%" PRIi64 "", &minimumOutgroupDegree); assert(i == 1); break; case 'D': listOfEndAlignmentFiles = stString_split(optarg); break; case 'E': endAlignmentsToPrecomputeOutputFile = stString_copy(optarg); break; case 'F': useProgressiveMerging = 1; break; case 'G': calculateWhichEndsToComputeSeparately = 1; break; case 'I': i = sscanf(optarg, "%" PRIi64 "", &largeEndSize); assert(i == 1); break; case 'J': ingroupCoverageFilePath = stString_copy(optarg); break; case 'K': i = sscanf(optarg, "%" PRIi64, &minimumSizeToRescue); assert(i == 1); break; case 'M': i = sscanf(optarg, "%lf", &minimumCoverageToRescue); assert(i == 1); break; case 'N': i = sscanf(optarg, "%" PRIi64, &minimumNumberOfSpecies); if (i != 1) { st_errAbort("Error parsing minimumNumberOfSpecies parameter"); } break; default: usage(); return 1; } } st_setLogLevelFromString(logLevelString); /* * Load the flowerdisk */ stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); //We precache the sequences st_logInfo("Set up the flower disk\n"); /* * Load the hmm */ StateMachine *sM = stateMachine5_construct(fiveState); /* * For each flower. */ if (calculateWhichEndsToComputeSeparately) { stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk); if (stList_length(flowers) != 1) { st_errAbort("We are breaking up a flower's end alignments for precomputation but we have %" PRIi64 " flowers.\n", stList_length(flowers)); } stSortedSet *endsToAlignSeparately = getEndsToAlignSeparately(stList_get(flowers, 0), maximumLength, largeEndSize); assert(stSortedSet_size(endsToAlignSeparately) != 1); stSortedSetIterator *it = stSortedSet_getIterator(endsToAlignSeparately); End *end; while ((end = stSortedSet_getNext(it)) != NULL) { fprintf(stdout, "%s\t%" PRIi64 "\t%" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), end_getInstanceNumber(end), getTotalAdjacencyLength(end)); } return 0; //avoid cleanup costs stSortedSet_destructIterator(it); stSortedSet_destruct(endsToAlignSeparately); } else if (endAlignmentsToPrecomputeOutputFile != NULL) { /* * In this case we will align a set of end and save the alignments in a file. */ stList *names = flowerWriter_parseNames(stdin); Flower *flower = cactusDisk_getFlower(cactusDisk, *((Name *)stList_get(names, 0))); FILE *fileHandle = fopen(endAlignmentsToPrecomputeOutputFile, "w"); for(int64_t i=1; i<stList_length(names); i++) { End *end = flower_getEnd(flower, *((Name *)stList_get(names, i))); if (end == NULL) { st_errAbort("The end %" PRIi64 " was not found in the flower\n", *((Name *)stList_get(names, i))); } stSortedSet *endAlignment = makeEndAlignment(sM, end, spanningTrees, maximumLength, useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters); writeEndAlignmentToDisk(end, endAlignment, fileHandle); stSortedSet_destruct(endAlignment); } fclose(fileHandle); return 0; //avoid cleanup costs stList_destruct(names); st_logInfo("Finished precomputing end alignments\n"); } else { /* * Compute complete flower alignments, possibly loading some precomputed alignments. */ bedRegion *bedRegions = NULL; size_t numBeds = 0; if (ingroupCoverageFilePath != NULL) { // Pre-load the mmap for the coverage file. FILE *coverageFile = fopen(ingroupCoverageFilePath, "rb"); if (coverageFile == NULL) { st_errnoAbort("Opening coverage file %s failed", ingroupCoverageFilePath); } fseek(coverageFile, 0, SEEK_END); int64_t coverageFileLen = ftell(coverageFile); assert(coverageFileLen >= 0); assert(coverageFileLen % sizeof(bedRegion) == 0); if (coverageFileLen == 0) { // mmap doesn't like length-0 mappings, for obvious // reasons. Pretend that the coverage file doesn't // exist in this case, since it contains no data. ingroupCoverageFilePath = NULL; } else { // Establish a memory mapping for the file. bedRegions = mmap(NULL, coverageFileLen, PROT_READ, MAP_SHARED, fileno(coverageFile), 0); if (bedRegions == MAP_FAILED) { st_errnoAbort("Failure mapping coverage file"); } numBeds = coverageFileLen / sizeof(bedRegion); } fclose(coverageFile); } stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk); if (listOfEndAlignmentFiles != NULL && stList_length(flowers) != 1) { st_errAbort("We have precomputed alignments but %" PRIi64 " flowers to align.\n", stList_length(flowers)); } cactusDisk_preCacheStrings(cactusDisk, flowers); for (j = 0; j < stList_length(flowers); j++) { flower = stList_get(flowers, j); st_logInfo("Processing a flower\n"); stSortedSet *alignedPairs = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength, useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters, pruneOutStubAlignments); st_logInfo("Created the alignment: %" PRIi64 " pairs\n", stSortedSet_size(alignedPairs)); stPinchIterator *pinchIterator = stPinchIterator_constructFromAlignedPairs(alignedPairs, getNextAlignedPairAlignment); /* * Run the cactus caf functions to build cactus. */ stPinchThreadSet *threadSet = stCaf_setup(flower); stCaf_anneal(threadSet, pinchIterator, NULL); if (minimumDegree < 2) { stCaf_makeDegreeOneBlocks(threadSet); } if (minimumIngroupDegree > 0 || minimumOutgroupDegree > 0 || minimumDegree > 1) { stCaf_melt(flower, threadSet, blockFilterFn, 0, 0, 0, INT64_MAX); } if (ingroupCoverageFilePath != NULL) { // Rescue any sequence that is covered by outgroups // but currently unaligned into single-degree blocks. stPinchThreadSetIt pinchIt = stPinchThreadSet_getIt(threadSet); stPinchThread *thread; while ((thread = stPinchThreadSetIt_getNext(&pinchIt)) != NULL) { Cap *cap = flower_getCap(flower, stPinchThread_getName(thread)); assert(cap != NULL); Sequence *sequence = cap_getSequence(cap); assert(sequence != NULL); rescueCoveredRegions(thread, bedRegions, numBeds, sequence_getName(sequence), minimumSizeToRescue, minimumCoverageToRescue); } stCaf_joinTrivialBoundaries(threadSet); } stCaf_finish(flower, threadSet, chainLengthForBigFlower, longChain, INT64_MAX, INT64_MAX); //Flower now destroyed. stPinchThreadSet_destruct(threadSet); st_logInfo("Ran the cactus core script.\n"); /* * Cleanup */ //Clean up the sorted set after cleaning up the iterator stPinchIterator_destruct(pinchIterator); stSortedSet_destruct(alignedPairs); st_logInfo("Finished filling in the alignments for the flower\n"); } stList_destruct(flowers); //st_errAbort("Done\n"); /* * Write and close the cactusdisk. */ cactusDisk_write(cactusDisk); return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. if (bedRegions != NULL) { // Clean up our mapping. munmap(bedRegions, numBeds * sizeof(bedRegion)); } } /////////////////////////////////////////////////////////////////////////// // Cleanup /////////////////////////////////////////////////////////////////////////// stateMachine_destruct(sM); cactusDisk_destruct(cactusDisk); stKVDatabaseConf_destruct(kvDatabaseConf); //destructCactusCoreInputParameters(cCIP); free(cactusDiskDatabaseString); if (listOfEndAlignmentFiles != NULL) { stList_destruct(listOfEndAlignmentFiles); } if (logLevelString != NULL) { free(logLevelString); } st_logInfo("Finished with the flower disk for this flower.\n"); //while(1); return 0; }