stList *splitMultipleStubCycles(stList *chosenEdges, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges, stList *stubEdges, stList *chainEdges) { /* * Returns an updated list of adjacency edges, such that each stub edge is a member of exactly one cycle. */ /* * Calculate components. */ stList *cycles = getComponents2(chosenEdges, stubEdges, chainEdges); /* * Find components with multiple stub edges. */ stList *singleStubEdgeCycles = stList_construct3(0, (void(*)(void *)) stList_destruct); for (int64_t i = 0; i < stList_length(cycles); i++) { stList *subCycle = stList_get(cycles, i); stList *subAdjacencyEdges; stList *subStubEdges; stList *subChainEdges; splitIntoAdjacenciesStubsAndChains(subCycle, nonZeroWeightAdjacencyEdges, stubEdges, chainEdges, &subAdjacencyEdges, &subStubEdges, &subChainEdges); stList *splitCycles = splitMultipleStubCycle(subCycle, subAdjacencyEdges, allAdjacencyEdges, subStubEdges, subChainEdges); stList_appendAll(singleStubEdgeCycles, splitCycles); stList_setDestructor(splitCycles, NULL); //Do this to avoid destroying the underlying lists stList_destruct(splitCycles); stList_destruct(subAdjacencyEdges); stList_destruct(subStubEdges); stList_destruct(subChainEdges); } stList_destruct(cycles); /* * Remove the stub/chain edges from the components. */ stSortedSet *stubAndChainEdgesSet = getSetOfMergedLists(stubEdges, chainEdges); stList *adjacencyOnlyComponents = filterListsToExclude( singleStubEdgeCycles, stubAndChainEdgesSet); stList_destruct(singleStubEdgeCycles); stSortedSet_destruct(stubAndChainEdgesSet); /* * Merge the adjacency edges in the components into a single list. */ stList *updatedChosenEdges = stList_join(adjacencyOnlyComponents); stList_destruct(adjacencyOnlyComponents); return updatedChosenEdges; }
void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName, bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) { /* * A reference thread between the two caps * in each flower f may be broken into two in the children of f. * Therefore, for each flower f first identify attached stub ends present in the children of f that are * not present in f and copy them into f, reattaching the reference caps as needed. */ stList *caps = getCaps(flowers, referenceEventName); for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list. setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps); } for(int64_t i=0; i<stList_length(flowers); i++) { recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName); } //Build the phylogenetic event trees for base calling. segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree); for(int64_t i=0; i<stList_length(flowers); i++) { Flower *flower = stList_get(flowers, i); Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName); assert(refEvent != NULL); stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix)); } if (isTop) { stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn); assert(stList_length(threadStrings) == stList_length(caps)); int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences. for (int64_t i = 0; i < stList_length(threadStrings); i++) { Cap *cap = stList_get(caps, i); assert(cap_getStrand(cap)); assert(!cap_getSide(cap)); Flower *flower = end_getFlower(cap_getEnd(cap)); char *threadString = stList_get(threadStrings, i); bool trivialString = isTrivialString(&threadString); //This alters the original string MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++, threadString, trivialString); free(threadString); int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1); (void) endCoordinate; assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence)); } stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop stList_destruct(threadStrings); } else { buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn); } stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash); stList_destruct(caps); }
int main(int argc, char *argv[]) { ////////////////////////////////////////////// //Parse the inputs ////////////////////////////////////////////// parseBasicArguments(argc, argv, "linkageStats"); /////////////////////////////////////////////////////////////////////////// // Get the intervals /////////////////////////////////////////////////////////////////////////// stList *haplotypeEventStrings = getEventStrings( treatHaplotype1AsContamination ? NULL : hap1EventString, treatHaplotype2AsContamination ? NULL : hap2EventString); stList *assemblyEventStringInList = stList_construct(); stList_append(assemblyEventStringInList, assemblyEventString); stList *intervals = stList_construct3(0, (void (*)(void *))sequenceInterval_destruct); for(int64_t i=0; i<stList_length(haplotypeEventStrings); i++) { const char *hapEventString = stList_get(haplotypeEventStrings, i); st_logInfo("Getting contig paths for haplotype: %s", hapEventString); stList *contigPaths = getContigPaths(flower, hapEventString, assemblyEventStringInList); stList *hapIntervals = getSplitContigPathIntervals(flower, contigPaths, hapEventString, assemblyEventStringInList); stList_destruct(contigPaths); st_logInfo("Getting contig paths\n"); stList_appendAll(intervals, hapIntervals); stList_setDestructor(hapIntervals, NULL); stList_destruct(hapIntervals); } st_logDebug("Got a total of %" PRIi64 " intervals\n", stList_length(intervals)); /////////////////////////////////////////////////////////////////////////// // Write it out. /////////////////////////////////////////////////////////////////////////// FILE *fileHandle = fopen(outputFile, "w"); for (int64_t i = 0; i < stList_length(intervals); i++) { SequenceInterval *sequenceInterval = stList_get(intervals, i); st_logDebug("We have a path interval %s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName, sequenceInterval->start, sequenceInterval->end); fprintf(fileHandle, "%s %" PRIi64 " %" PRIi64 "\n", sequenceInterval->sequenceName, sequenceInterval->start, sequenceInterval->end); } st_logInfo("Finished writing out the stats.\n"); fclose(fileHandle); return 0; }
static stList *getRecords(CactusDisk *cactusDisk, stList *objectNames, char *type) { if (stList_length(objectNames) == 0) { return stList_construct3(0, NULL); } stList *records = NULL; stTry { records = stKVDatabase_bulkGetRecords(cactusDisk->database, objectNames); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "An unknown database error occurred when getting a bulk set of %s", type); }stTryEnd ; assert(records != NULL); assert(stList_length(objectNames) == stList_length(records)); stList_setDestructor(records, free); for (int64_t i = 0; i < stList_length(objectNames); i++) { Name objectName = *((int64_t *) stList_get(objectNames, i)); int64_t recordSize; void *record; stKVDatabaseBulkResult *result = stList_get(records, i); assert(result != NULL); if (!stCache_containsRecord(cactusDisk->cache, objectName, 0, INT64_MAX)) { record = stKVDatabaseBulkResult_getRecord(result, &recordSize); assert(recordSize >= 0); assert(record != NULL); record = decompress(record, &recordSize); stCache_setRecord(cactusDisk->cache, objectName, 0, recordSize, record); } else { record = stCache_getRecord(cactusDisk->cache, objectName, 0, INT64_MAX, &recordSize); assert(recordSize >= 0); assert(record != NULL); } stKVDatabaseBulkResult_destruct(result); stList_set(records, i, record); } return records; }
static stList *splitMultipleStubCycle(stList *cycle, stList *nonZeroWeightAdjacencyEdges, stSortedSet *allAdjacencyEdges, stList *stubEdges, stList *chainEdges) { /* * Takes a simple cycle containing k stub edges and splits into k cycles, each containing 1 stub edge. */ /* * Get sub-components containing only adjacency and chain edges. */ stSortedSet *stubAndChainEdgesSet = getSetOfMergedLists(stubEdges, chainEdges); stList *adjacencyEdgeMatching = stList_filterToExclude(cycle, stubAndChainEdgesSet); //Filter out the the non-adjacency edges //Make it only the chain edges present in the original component stList *stubFreePaths = getComponents2(adjacencyEdgeMatching, NULL, chainEdges); stList_destruct(adjacencyEdgeMatching); assert(stList_length(stubFreePaths) >= 1); stList *splitCycles = stList_construct3(0, (void(*)(void *)) stList_destruct); //The list to return. if (stList_length(stubFreePaths) > 1) { /* * Build the list of adjacency edges acceptable in the merge */ stSortedSet *oddNodes = getOddNodes(cycle); stList *oddToEvenNonZeroWeightAdjacencyEdges = getOddToEvenAdjacencyEdges(oddNodes, nonZeroWeightAdjacencyEdges); stSortedSet *oddToEvenAllAdjacencyEdges = getOddToEvenAdjacencyEdges2(oddNodes, allAdjacencyEdges); /* * Merge together the best two components. */ stList *l = filterListsToExclude(stubFreePaths, stubAndChainEdgesSet); doBestMergeOfTwoSimpleCycles(l, oddToEvenNonZeroWeightAdjacencyEdges, oddToEvenAllAdjacencyEdges); //This is inplace. stList *l2 = stList_join(l); stList_destruct(l); l = getComponents2(l2, stubEdges, chainEdges); assert(stList_length(l) == 2); stList_destruct(l2); /* * Cleanup */ stSortedSet_destruct(oddNodes); stList_destruct(oddToEvenNonZeroWeightAdjacencyEdges); stSortedSet_destruct(oddToEvenAllAdjacencyEdges); /* * Call procedure recursively. */ for (int64_t i = 0; i < stList_length(l); i++) { /* * Split into adjacency edges, stub edges and chain edges. */ stList *subCycle = stList_get(l, i); stList *subAdjacencyEdges; stList *subStubEdges; stList *subChainEdges; splitIntoAdjacenciesStubsAndChains(subCycle, nonZeroWeightAdjacencyEdges, stubEdges, chainEdges, &subAdjacencyEdges, &subStubEdges, &subChainEdges); /* * Call recursively. */ l2 = splitMultipleStubCycle(subCycle, subAdjacencyEdges, allAdjacencyEdges, subStubEdges, subChainEdges); stList_appendAll(splitCycles, l2); /* * Clean up */ stList_setDestructor(l2, NULL); stList_destruct(l2); stList_destruct(subAdjacencyEdges); stList_destruct(subStubEdges); stList_destruct(subChainEdges); } stList_destruct(l); } else { stList_append(splitCycles, stList_copy(cycle, NULL)); } stSortedSet_destruct(stubAndChainEdgesSet); stList_destruct(stubFreePaths); return splitCycles; }