static void recoverBrokenAdjacencies(Flower *flower, stList *recoveredCaps, Name referenceEventName) { /* * Find reference intervals that are book-ended by stubs created in a child flower. */ Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while((group = flower_getNextGroup(groupIt)) != NULL) { Flower *nestedFlower; if((nestedFlower = group_getNestedFlower(group)) != NULL) { Flower_EndIterator *endIt = flower_getEndIterator(nestedFlower); End *childEnd; while((childEnd = flower_getNextEnd(endIt)) != NULL) { if(end_isStubEnd(childEnd) && flower_getEnd(flower, end_getName(childEnd)) == NULL) { //We have a thread we need to promote Cap *childCap = getCapForReferenceEvent(childEnd, referenceEventName); //The cap in the reference assert(childCap != NULL); assert(!end_isAttached(childEnd)); childCap = cap_getStrand(childCap) ? childCap : cap_getReverse(childCap); if (!cap_getSide(childCap)) { Cap *adjacentChildCap = NULL; int64_t adjacencyLength = traceThreadLength(childCap, &adjacentChildCap); Cap *cap = copyCapToParent(childCap, recoveredCaps); assert(adjacentChildCap != NULL); assert(!end_isAttached(cap_getEnd(adjacentChildCap))); assert(!cap_getSide(cap)); Cap *adjacentCap = copyCapToParent(adjacentChildCap, recoveredCaps); cap_makeAdjacent(cap, adjacentCap); setAdjacencyLength(cap, adjacentCap, adjacencyLength); } } } flower_destructEndIterator(endIt); } } flower_destructGroupIterator(groupIt); }
void stCaf_addAdjacencies(Flower *flower) { //Build a list of caps. stList *list = stList_construct(); Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIterator)) != NULL) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { if (!cap_getStrand(cap)) { cap = cap_getReverse(cap); } stList_append(list, cap); } end_destructInstanceIterator(instanceIterator); } flower_destructEndIterator(endIterator); assert(stList_length(list) % 2 == 0); //Sort the list of caps. stList_sort(list, (int(*)(const void *, const void *)) addAdjacenciesPP); //Now make the adjacencies. for (int64_t i = 1; i < stList_length(list); i += 2) { Cap *cap = stList_get(list, i - 1); Cap *cap2 = stList_get(list, i); cap_makeAdjacent(cap, cap2); } //Clean up. stList_destruct(list); }
void flower_check(Flower *flower) { eventTree_check(flower_getEventTree(flower)); Flower_GroupIterator *groupIterator = flower_getGroupIterator(flower); Group *group; while ((group = flower_getNextGroup(groupIterator)) != NULL) { group_check(group); } flower_destructGroupIterator(groupIterator); Flower_ChainIterator *chainIterator = flower_getChainIterator(flower); Chain *chain; while ((chain = flower_getNextChain(chainIterator)) != NULL) { chain_check(chain); } flower_destructCapIterator(chainIterator); //We check built trees in here. Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIterator)) != NULL) { end_check(end); end_check(end_getReverse(end)); //We will test everything backwards also. } flower_destructEndIterator(endIterator); if (flower_builtFaces(flower)) { Flower_FaceIterator *faceIterator = flower_getFaceIterator(flower); Face *face; while ((face = flower_getNextFace(faceIterator)) != NULL) { face_check(face); } flower_destructFaceIterator(faceIterator); face_checkFaces(flower); } else { cactusCheck(flower_getFaceNumber(flower) == 0); } if (flower_builtBlocks(flower)) { //Note that a flower for which the blocks are not yet built must be a leaf. Flower_BlockIterator *blockIterator = flower_getBlockIterator(flower); Block *block; while ((block = flower_getNextBlock(blockIterator)) != NULL) { block_check(block); block_check(block_getReverse(block)); //We will test everything backwards also. } flower_destructBlockIterator(blockIterator); } else { cactusCheck(flower_isLeaf(flower)); //Defensive cactusCheck(flower_isTerminal(flower)); //Checks that a flower without built blocks is a leaf and does not //contain any blocks. } Flower_SequenceIterator *sequenceIterator = flower_getSequenceIterator(flower); Sequence *sequence; while ((sequence = flower_getNextSequence(sequenceIterator)) != NULL) { sequence_check(sequence); } flower_destructSequenceIterator(sequenceIterator); }
void flower_writeBinaryRepresentation(Flower *flower, void(*writeFn)(const void * ptr, size_t size, size_t count)) { Flower_SequenceIterator *sequenceIterator; Flower_EndIterator *endIterator; Flower_BlockIterator *blockIterator; Flower_GroupIterator *groupIterator; Flower_ChainIterator *chainIterator; Sequence *sequence; End *end; Block *block; Group *group; Chain *chain; binaryRepresentation_writeElementType(CODE_FLOWER, writeFn); binaryRepresentation_writeName(flower_getName(flower), writeFn); binaryRepresentation_writeBool(flower_builtBlocks(flower), writeFn); binaryRepresentation_writeBool(flower_builtTrees(flower), writeFn); binaryRepresentation_writeBool(flower_builtFaces(flower), writeFn); binaryRepresentation_writeName(flower->parentFlowerName, writeFn); if (flower_getEventTree(flower) != NULL) { eventTree_writeBinaryRepresentation(flower_getEventTree(flower), writeFn); } sequenceIterator = flower_getSequenceIterator(flower); while ((sequence = flower_getNextSequence(sequenceIterator)) != NULL) { sequence_writeBinaryRepresentation(sequence, writeFn); } flower_destructSequenceIterator(sequenceIterator); endIterator = flower_getEndIterator(flower); while ((end = flower_getNextEnd(endIterator)) != NULL) { end_writeBinaryRepresentation(end, writeFn); } flower_destructEndIterator(endIterator); blockIterator = flower_getBlockIterator(flower); while ((block = flower_getNextBlock(blockIterator)) != NULL) { block_writeBinaryRepresentation(block, writeFn); } flower_destructBlockIterator(blockIterator); groupIterator = flower_getGroupIterator(flower); while ((group = flower_getNextGroup(groupIterator)) != NULL) { group_writeBinaryRepresentation(group, writeFn); } flower_destructGroupIterator(groupIterator); chainIterator = flower_getChainIterator(flower); while ((chain = flower_getNextChain(chainIterator)) != NULL) { chain_writeBinaryRepresentation(chain, writeFn); } flower_destructChainIterator(chainIterator); binaryRepresentation_writeElementType(CODE_FLOWER, writeFn); //this avoids interpretting things wrong. }
int64_t flower_getFreeStubEndNumber(Flower *flower) { End *end; Flower_EndIterator *iterator = flower_getEndIterator(flower); int64_t i = 0; while ((end = flower_getNextEnd(iterator)) != NULL) { if (end_isStubEnd(end) && end_isFree(end)) { i++; } } flower_destructEndIterator(iterator); return i; }
static stList *getCaps(stList *flowers, Name referenceEventName) { stList *caps = stList_construct(); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); //Get list of caps Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { if (end_isStubEnd(end)) { Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference if(cap != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { stList_append(caps, cap); } } } } flower_destructEndIterator(endIt); } return caps; }
int64_t flower_getTotalBaseLength(Flower *flower) { /* * The implementation of this function is very like that in group_getTotalBaseLength, with a few differences. Consider merging them. */ Flower_EndIterator *endIterator = flower_getEndIterator(flower); End *end; int64_t totalLength = 0; while ((end = flower_getNextEnd(endIterator)) != NULL) { if (!end_isBlockEnd(end)) { End_InstanceIterator *instanceIterator = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIterator)) != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap) && cap_getSequence(cap) != NULL) { Cap *cap2 = cap_getAdjacency(cap); assert(cap2 != NULL); while (end_isBlockEnd(cap_getEnd(cap2))) { Segment *segment = cap_getSegment(cap2); assert(segment != NULL); assert(segment_get5Cap(segment) == cap2); cap2 = cap_getAdjacency(segment_get3Cap(segment)); assert(cap2 != NULL); assert(cap_getStrand(cap2)); assert(cap_getSide(cap2)); } assert(cap_getStrand(cap2)); assert(cap_getSide(cap2)); int64_t length = cap_getCoordinate(cap2) - cap_getCoordinate(cap) - 1; assert(length >= 0); totalLength += length; } } end_destructInstanceIterator(instanceIterator); } } flower_destructEndIterator(endIterator); return totalLength; }
static stList *getSubstringsForFlowers(stList *flowers) { /* * Get the set of substrings for sequence intervals in the given set of flowers. */ stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct); for (int64_t i = 0; i < stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { if (end_isStubEnd(end)) { End_InstanceIterator *instanceIt = end_getInstanceIterator(end); Cap *cap; while ((cap = end_getNext(instanceIt)) != NULL) { Sequence *sequence; if ((sequence = cap_getSequence(cap)) != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { //We have a sequence interval of interest Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); int64_t length = cap_getCoordinate(adjacentCap) - cap_getCoordinate(cap) - 1; assert(length >= 0); if (length > 0) { stList_append(substrings, substring_construct(sequence_getMetaSequence(sequence)->stringName, cap_getCoordinate(cap) + 1 - sequence_getStart(sequence), length)); } } } } end_destructInstanceIterator(instanceIt); } } flower_destructEndIterator(endIt); } return substrings; }
void topDown(Flower *flower, Name referenceEventName) { /* * Run on each flower, top down. Sets the coordinates of each reference cap to the correct * sequence, and sets the bases of the reference sequence to be consensus bases. */ Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while ((end = flower_getNextEnd(endIt)) != NULL) { Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference if (cap != NULL) { cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); if (!cap_getSide(cap)) { assert(cap_getCoordinate(cap) != INT64_MAX); Sequence *sequence = cap_getSequence(cap); assert(sequence != NULL); Group *group = end_getGroup(end); if (!group_isLeaf(group)) { Flower *nestedFlower = group_getNestedFlower(group); Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap)); assert(nestedCap != NULL); nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap); assert(cap_getStrand(nestedCap)); assert(!cap_getSide(nestedCap)); int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence), nestedCap, cap_getCoordinate(cap)); (void) endCoordinate; assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap))); assert(endCoordinate == cap_getCoordinate( flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap))))); } } } } flower_destructEndIterator(endIt); }
int main(int argc, char *argv[]) { st_setLogLevelFromString(argv[1]); st_logDebug("Set up logging\n"); stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); stKVDatabaseConf_destruct(kvDatabaseConf); st_logDebug("Set up the flower disk\n"); Name flowerName = cactusMisc_stringToName(argv[3]); Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName); int64_t totalBases = flower_getTotalBaseLength(flower); int64_t totalEnds = flower_getEndNumber(flower); int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower); int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower); int64_t totalCaps = flower_getCapNumber(flower); int64_t totalBlocks = flower_getBlockNumber(flower); int64_t totalGroups = flower_getGroupNumber(flower); int64_t totalChains = flower_getChainNumber(flower); int64_t totalLinkGroups = 0; int64_t maxEndDegree = 0; int64_t maxAdjacencyLength = 0; int64_t totalEdges = 0; Flower_EndIterator *endIt = flower_getEndIterator(flower); End *end; while((end = flower_getNextEnd(endIt)) != NULL) { assert(end_getOrientation(end)); if(end_getInstanceNumber(end) > maxEndDegree) { maxEndDegree = end_getInstanceNumber(end); } stSortedSet *ends = stSortedSet_construct(); End_InstanceIterator *capIt = end_getInstanceIterator(end); Cap *cap; while((cap = end_getNext(capIt)) != NULL) { if(cap_getSequence(cap) != NULL) { Cap *adjacentCap = cap_getAdjacency(cap); assert(adjacentCap != NULL); End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap)); stSortedSet_insert(ends, adjacentEnd); int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap); if(adjacencyLength < 0) { adjacencyLength *= -1; } assert(adjacencyLength >= 1); if(adjacencyLength >= maxAdjacencyLength) { maxAdjacencyLength = adjacencyLength; } } } end_destructInstanceIterator(capIt); totalEdges += stSortedSet_size(ends); if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works. totalEdges += 1; } stSortedSet_destruct(ends); } assert(totalEdges % 2 == 0); flower_destructEndIterator(endIt); Flower_GroupIterator *groupIt = flower_getGroupIterator(flower); Group *group; while((group = flower_getNextGroup(groupIt)) != NULL) { if(group_getLink(group) != NULL) { totalLinkGroups++; } } flower_destructGroupIterator(groupIt); printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n", flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups); return 0; }
int main(int argc, char *argv[]) { /* * Open the database. * Construct a flower. * Construct an event tree representing the species tree. * For each sequence contruct two ends each containing an cap. * Make a file for the sequence. * Link the two caps. * Finish! */ int64_t key, j; Group *group; Flower_EndIterator *endIterator; End *end; bool makeEventHeadersAlphaNumeric = 0; /* * Arguments/options */ char * logLevelString = NULL; char * speciesTree = NULL; char * outgroupEvents = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "speciesTree", required_argument, 0, 'f' }, { "outgroupEvents", required_argument, 0, 'g' }, { "help", no_argument, 0, 'h' }, { "makeEventHeadersAlphaNumeric", no_argument, 0, 'i' }, { 0, 0, 0, 0 } }; int option_index = 0; key = getopt_long(argc, argv, "a:b:f:hg:i", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = optarg; break; case 'b': cactusDiskDatabaseString = optarg; break; case 'f': speciesTree = optarg; break; case 'g': outgroupEvents = optarg; break; case 'h': usage(); return 0; case 'i': makeEventHeadersAlphaNumeric = 1; break; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// //assert(logLevelString == NULL || strcmp(logLevelString, "CRITICAL") == 0 || strcmp(logLevelString, "INFO") == 0 || strcmp(logLevelString, "DEBUG") == 0); assert(cactusDiskDatabaseString != NULL); assert(speciesTree != NULL); ////////////////////////////////////////////// //Set up logging ////////////////////////////////////////////// st_setLogLevelFromString(logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString); for (j = optind; j < argc; j++) { st_logInfo("Sequence file/directory %s\n", argv[j]); } ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); if (stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeTokyoCabinet || stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeKyotoTycoon) { assert(stKVDatabaseConf_getDir(kvDatabaseConf) != NULL); cactusDisk = cactusDisk_construct2(kvDatabaseConf, "cactusSequences"); } else { cactusDisk = cactusDisk_construct(kvDatabaseConf, 1); } st_logInfo("Set up the flower disk\n"); ////////////////////////////////////////////// //Construct the flower ////////////////////////////////////////////// if (cactusDisk_getFlower(cactusDisk, 0) != NULL) { cactusDisk_destruct(cactusDisk); st_logInfo("The first flower already exists\n"); return 0; } flower = flower_construct2(0, cactusDisk); assert(flower_getName(flower) == 0); st_logInfo("Constructed the flower\n"); ////////////////////////////////////////////// //Construct the event tree ////////////////////////////////////////////// st_logInfo("Going to build the event tree with newick string: %s\n", speciesTree); stTree *tree = stTree_parseNewickString(speciesTree); st_logInfo("Parsed the tree\n"); if (makeEventHeadersAlphaNumeric) { makeEventHeadersAlphaNumericFn(tree); } stTree_setBranchLength(tree, INT64_MAX); checkBranchLengthsAreDefined(tree); eventTree = eventTree_construct2(flower); //creates the event tree and the root even totalEventNumber = 1; st_logInfo("Constructed the basic event tree\n"); // Construct a set of outgroup names so that ancestral outgroups // get recognized. stSet *outgroupNameSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free); if(outgroupEvents != NULL) { stList *outgroupNames = stString_split(outgroupEvents); for(int64_t i = 0; i < stList_length(outgroupNames); i++) { char *outgroupName = stList_get(outgroupNames, i); stSet_insert(outgroupNameSet, stString_copy(outgroupName)); } stList_destruct(outgroupNames); } //now traverse the tree j = optind; assignEventsAndSequences(eventTree_getRootEvent(eventTree), tree, outgroupNameSet, argv, &j); char *eventTreeString = eventTree_makeNewickString(eventTree); st_logInfo( "Constructed the initial flower with %" PRIi64 " sequences and %" PRIi64 " events with string: %s\n", totalSequenceNumber, totalEventNumber, eventTreeString); assert(event_getSubTreeBranchLength(eventTree_getRootEvent(eventTree)) >= 0.0); free(eventTreeString); //assert(0); ////////////////////////////////////////////// //Label any outgroup events. ////////////////////////////////////////////// if (outgroupEvents != NULL) { stList *outgroupEventsList = stString_split(outgroupEvents); for (int64_t i = 0; i < stList_length(outgroupEventsList); i++) { char *outgroupEvent = makeEventHeadersAlphaNumeric ? makeAlphaNumeric(stList_get(outgroupEventsList, i)) : stString_copy(stList_get(outgroupEventsList, i)); Event *event = eventTree_getEventByHeader(eventTree, outgroupEvent); if (event == NULL) { st_errAbort("Got an outgroup string that does not match an event, outgroup string %s", outgroupEvent); } assert(!event_isOutgroup(event)); event_setOutgroupStatus(event, 1); assert(event_isOutgroup(event)); free(outgroupEvent); } stList_destruct(outgroupEventsList); } ////////////////////////////////////////////// //Construct the terminal group. ////////////////////////////////////////////// if (flower_getEndNumber(flower) > 0) { group = group_construct2(flower); endIterator = flower_getEndIterator(flower); while ((end = flower_getNextEnd(endIterator)) != NULL) { end_setGroup(end, group); } flower_destructEndIterator(endIterator); assert(group_isLeaf(group)); // Create a one link chain if there is only one pair of attached ends.. group_constructChainForLink(group); assert(!flower_builtBlocks(flower)); } else { flower_setBuiltBlocks(flower, 1); } /////////////////////////////////////////////////////////////////////////// // Write the flower to disk. /////////////////////////////////////////////////////////////////////////// //flower_check(flower); cactusDisk_write(cactusDisk); st_logInfo("Updated the flower on disk\n"); /////////////////////////////////////////////////////////////////////////// // Cleanup. /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. stSet_destruct(outgroupNameSet); stTree_destruct(tree); stKVDatabaseConf_destruct(kvDatabaseConf); return 0; }