static IDnum *computeReadToNodeCounts() { IDnum readIndex, nodeIndex; IDnum maxNodeIndex = 2 * nodeCount(graph) + 1; IDnum maxReadIndex = sequenceCount(graph) + 1; IDnum *readNodeCounts = callocOrExit(maxReadIndex, IDnum); boolean *readMarker = callocOrExit(maxReadIndex, boolean); ShortReadMarker *nodeArray, *shortMarker; PassageMarkerI marker; Node *node; IDnum nodeReadCount; //puts("Computing read to node mapping array sizes"); for (nodeIndex = 0; nodeIndex < maxNodeIndex; nodeIndex++) { node = getNodeInGraph(graph, nodeIndex - nodeCount(graph)); if (node == NULL) continue; // Short reads if (readStartsAreActivated(graph)) { nodeArray = getNodeReads(node, graph); nodeReadCount = getNodeReadCount(node, graph); for (readIndex = 0; readIndex < nodeReadCount; readIndex++) { shortMarker = getShortReadMarkerAtIndex(nodeArray, readIndex); readNodeCounts[getShortReadMarkerID (shortMarker)]++; } } // Long reads for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { readIndex = getPassageMarkerSequenceID(marker); if (readIndex < 0) continue; if (readMarker[readIndex]) continue; readNodeCounts[readIndex]++; readMarker[readIndex] = true; } // Clean up marker array for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { readIndex = getPassageMarkerSequenceID(marker); if (readIndex > 0) readMarker[readIndex] = false; } } free(readMarker); return readNodeCounts; }
void concatenateReadStarts(Node * target, Node * source, Graph * graph) { IDnum sourceLength, targetLength; ShortReadMarker *sourceArray, *targetArray, *marker; IDnum index; Coordinate position, nodeLength; if (!readStartsAreActivated(graph)) return; if (target == NULL || source == NULL) return; // Update Coordinates sourceArray = getNodeReads(source, graph); sourceLength = getNodeReadCount(source, graph); nodeLength = getNodeLength(target); for (index = 0; index < sourceLength; index++) { marker = getShortReadMarkerAtIndex(sourceArray, index); position = getShortReadMarkerPosition(marker); if (position != -1) { position += nodeLength; setShortReadMarkerPosition(marker, position); } } // Same but for symmetrical reads targetArray = getNodeReads(getTwinNode(target), graph); targetLength = getNodeReadCount(getTwinNode(target), graph); nodeLength = getNodeLength(source); for (index = 0; index < targetLength; index++) { marker = getShortReadMarkerAtIndex(targetArray, index); position = getShortReadMarkerPosition(marker); if (position != -1) { position += nodeLength; setShortReadMarkerPosition(marker, position); } } // Merging lists mergeNodeReads(target, source, graph); mergeNodeReads(getTwinNode(target), getTwinNode(source), graph); }
static void adjustShortReadsByLength(Node * target, Coordinate nodeLength) { ShortReadMarker *targetArray, *marker; IDnum targetLength, index; Coordinate position; if (!readStartsAreActivated(graph)) return; targetArray = getNodeReads(getTwinNode(target), graph); targetLength = getNodeReadCount(getTwinNode(target), graph); for (index = 0; index < targetLength; index++) { marker = getShortReadMarkerAtIndex(targetArray, index); position = getShortReadMarkerPosition(marker); if (position != -1) { position += nodeLength; setShortReadMarkerPosition(marker, position); } } }
static void projectFromNode(IDnum nodeID, ReadOccurence ** readNodes, IDnum * readNodeCounts, IDnum * readPairs, Category * cats, boolean * dubious, ShortLength * lengths, boolean weight) { IDnum index; ShortReadMarker *nodeArray, *shortMarker; PassageMarkerI marker; Node *node; IDnum nodeReadCount; node = getNodeInGraph(graph, nodeID); if (node == NULL || !getUniqueness(node)) return; if (readStartsAreActivated(graph)) { nodeArray = getNodeReads(node, graph); nodeReadCount = getNodeReadCount(node, graph); for (index = 0; index < nodeReadCount; index++) { shortMarker = getShortReadMarkerAtIndex(nodeArray, index); if (dubious[getShortReadMarkerID(shortMarker) - 1]) continue; projectFromShortRead(node, shortMarker, readPairs, cats, readNodes, readNodeCounts, lengths, weight); } } for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { if (getPassageMarkerSequenceID(marker) > 0) projectFromLongRead(node, marker, readPairs, cats, readNodes, readNodeCounts, lengths, weight); } }
void exploitShortReadPairs(Graph * argGraph, ReadSet * reads, boolean * dubious, boolean * shadows, boolean force_jumps) { boolean modified = true; graph = argGraph; if (!readStartsAreActivated(graph)) return; velvetLog("Starting pebble resolution...\n"); resetNodeStatus(graph); // Prepare scaffold buildScaffold(graph, reads, dubious, shadows); // Prepare graph prepareGraphForLocalCorrections(graph); // Prepare local scaffold localScaffold = callocOrExit(2 * nodeCount(graph) + 1, MiniConnection); // Loop until convergence while (modified) modified = expandLongNodes(force_jumps); // Clean up memory cleanMemory(); deactivateLocalCorrectionSettings(); sortGapMarkers(graph); velvetLog("Pebble done.\n"); }
static void computePartialReadToNodeMapping(IDnum nodeID, ReadOccurence ** readNodes, IDnum * readNodeCounts, boolean * readMarker) { ShortReadMarker *shortMarker; IDnum index, readIndex; ReadOccurence *readArray, *readOccurence; Node *node = getNodeInGraph(graph, nodeID); ShortReadMarker *nodeArray; IDnum nodeReadCount; PassageMarkerI marker; if (readStartsAreActivated(graph)) { nodeArray = getNodeReads(node, graph); nodeReadCount = getNodeReadCount(node, graph); for (index = 0; index < nodeReadCount; index++) { shortMarker = getShortReadMarkerAtIndex(nodeArray, index); readIndex = getShortReadMarkerID(shortMarker); readArray = readNodes[readIndex]; readOccurence = &readArray[readNodeCounts[readIndex]]; readOccurence->nodeID = nodeID; readOccurence->position = getShortReadMarkerPosition(shortMarker); readOccurence->offset = getShortReadMarkerOffset(shortMarker); readNodeCounts[readIndex]++; } } for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { readIndex = getPassageMarkerSequenceID(marker); if (readIndex < 0) continue; if (!readMarker[readIndex]) { readArray = readNodes[readIndex]; readOccurence = &readArray[readNodeCounts[readIndex]]; readOccurence->nodeID = nodeID; readOccurence->position = getStartOffset(marker); readOccurence->offset = getPassageMarkerStart(marker); readNodeCounts[readIndex]++; readMarker[readIndex] = true; } else { readArray = readNodes[readIndex]; readOccurence = &readArray[readNodeCounts[readIndex] - 1]; readOccurence->position = -1; readOccurence->offset = -1; } } for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { readIndex = getPassageMarkerSequenceID(marker); if (readIndex > 0) readMarker[readIndex] = false; } }
static void fillUpGraph(ReadSet * reads, KmerOccurenceTable * kmerTable, Graph * graph, boolean readTracking, boolean double_strand, ReferenceMapping * referenceMappings, Coordinate referenceMappingCount, IDnum refCount, char * roadmapFilename) { IDnum readIndex; RoadMapArray *roadmap = NULL; Coordinate *annotationOffset = NULL; struct timeval start, end, diff; if (referenceMappings) { roadmap = importRoadMapArray(roadmapFilename); annotationOffset = callocOrExit(reads->readCount, Coordinate); for (readIndex = 1; readIndex < reads->readCount; readIndex++) annotationOffset[readIndex] = annotationOffset[readIndex - 1] + getAnnotationCount(getRoadMapInArray(roadmap, readIndex - 1)); } resetNodeStatus(graph); // Allocate memory for the read pairs if (!readStartsAreActivated(graph)) activateReadStarts(graph); gettimeofday(&start, NULL); #ifdef OPENMP initSmallNodeListMemory(); createNodeLocks(graph); #pragma omp parallel for #endif for (readIndex = refCount; readIndex < reads->readCount; readIndex++) { Annotation * annotations = NULL; IDnum annotationCount = 0; Category category; boolean second_in_pair; if (readIndex % 1000000 == 0) velvetLog("Ghost Threading through reads %ld / %ld\n", (long) readIndex, (long) reads->readCount); category = reads->categories[readIndex]; second_in_pair = reads->categories[readIndex] & 1 && isSecondInPair(reads, readIndex); if (referenceMappings) { annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex)); annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]); } ghostThreadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex), kmerTable, graph, readIndex + 1, category, readTracking, double_strand, referenceMappings, referenceMappingCount, refCount, annotations, annotationCount, second_in_pair); } createNodeReadStartArrays(graph); gettimeofday(&end, NULL); timersub(&end, &start, &diff); velvetLog(" === Ghost-Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec); gettimeofday(&start, NULL); #ifdef OPENMP int threads = omp_get_max_threads(); if (threads > 32) threads = 32; #pragma omp parallel for num_threads(threads) #endif for (readIndex = 0; readIndex < reads->readCount; readIndex++) { Annotation * annotations = NULL; IDnum annotationCount = 0; Category category; boolean second_in_pair; if (readIndex % 1000000 == 0) velvetLog("Threading through reads %li / %li\n", (long) readIndex, (long) reads->readCount); category = reads->categories[readIndex]; second_in_pair = reads->categories[readIndex] % 2 && isSecondInPair(reads, readIndex); if (referenceMappings) { annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex)); annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]); } threadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex), kmerTable, graph, readIndex + 1, category, readTracking, double_strand, referenceMappings, referenceMappingCount, refCount, annotations, annotationCount, second_in_pair); } gettimeofday(&end, NULL); timersub(&end, &start, &diff); velvetLog(" === Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec); #ifdef OPENMP free(nodeLocks); nodeLocks = NULL; #endif if (referenceMappings) { destroyRoadMapArray(roadmap); free (annotationOffset); } orderNodeReadStartArrays(graph); destroySmallNodeListMemmory(); destroyKmerOccurenceTable(kmerTable); }
static void ghostThreadSequenceThroughGraph(TightString * tString, KmerOccurenceTable * kmerOccurences, Graph * graph, IDnum seqID, Category category, boolean readTracking, boolean double_strand) { Kmer word; Kmer antiWord; Coordinate readNucleotideIndex; KmerOccurence *kmerOccurence; int wordLength = getWordLength(graph); Nucleotide nucleotide; Node *node; Node *previousNode = NULL; clearKmer(&word); clearKmer(&antiWord); // Neglect any read which will not be short paired if ((!readTracking && category % 2 == 0) || category / 2 >= CATEGORIES) return; // Neglect any string shorter than WORDLENGTH : if (getLength(tString) < wordLength) return; // Verify that all short reads are reasonnably short if (getLength(tString) > USHRT_MAX) { printf("Short read of length %lli, longer than limit %i\n", (long long) getLength(tString), SHRT_MAX); puts("You should better declare this sequence as long, because it genuinely is!"); exit(1); } // Allocate memory for the read pairs if (!readStartsAreActivated(graph)) activateReadStarts(graph); // Fill in the initial word : for (readNucleotideIndex = 0; readNucleotideIndex < wordLength - 1; readNucleotideIndex++) { nucleotide = getNucleotide(readNucleotideIndex, tString); pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Go through sequence while (readNucleotideIndex < getLength(tString)) { // Shift word: nucleotide = getNucleotide(readNucleotideIndex++, tString); pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } // Search in table if ((!double_strand || compareKmers(&word, &antiWord) <= 0) && (kmerOccurence = findKmerOccurenceInSortedTable(&word, kmerOccurences))) { node = getNodeInGraph(graph, kmerOccurence->nodeID); } else if ((double_strand && compareKmers(&word, &antiWord) > 0) && (kmerOccurence = findKmerOccurenceInSortedTable(&antiWord, kmerOccurences))) { node = getNodeInGraph(graph, -kmerOccurence->nodeID); } else { node = NULL; if (previousNode) break; } previousNode = node; // Fill in graph if (node && !getNodeStatus(node)) { incrementReadStartCount(node, graph); setSingleNodeStatus(node, true); memorizeNode(node); } } unlockMemorizedNodes(); }