static void fillUpGraph(ReadSet * reads, KmerOccurenceTable * kmerTable, Graph * graph, boolean readTracking, boolean double_strand, ReferenceMapping * referenceMappings, Coordinate referenceMappingCount, IDnum refCount, char * roadmapFilename) { IDnum readIndex; RoadMapArray *roadmap = NULL; Coordinate *annotationOffset = NULL; struct timeval start, end, diff; if (referenceMappings) { roadmap = importRoadMapArray(roadmapFilename); annotationOffset = callocOrExit(reads->readCount, Coordinate); for (readIndex = 1; readIndex < reads->readCount; readIndex++) annotationOffset[readIndex] = annotationOffset[readIndex - 1] + getAnnotationCount(getRoadMapInArray(roadmap, readIndex - 1)); } resetNodeStatus(graph); // Allocate memory for the read pairs if (!readStartsAreActivated(graph)) activateReadStarts(graph); gettimeofday(&start, NULL); #ifdef OPENMP initSmallNodeListMemory(); createNodeLocks(graph); #pragma omp parallel for #endif for (readIndex = refCount; readIndex < reads->readCount; readIndex++) { Annotation * annotations = NULL; IDnum annotationCount = 0; Category category; boolean second_in_pair; if (readIndex % 1000000 == 0) velvetLog("Ghost Threading through reads %ld / %ld\n", (long) readIndex, (long) reads->readCount); category = reads->categories[readIndex]; second_in_pair = reads->categories[readIndex] & 1 && isSecondInPair(reads, readIndex); if (referenceMappings) { annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex)); annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]); } ghostThreadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex), kmerTable, graph, readIndex + 1, category, readTracking, double_strand, referenceMappings, referenceMappingCount, refCount, annotations, annotationCount, second_in_pair); } createNodeReadStartArrays(graph); gettimeofday(&end, NULL); timersub(&end, &start, &diff); velvetLog(" === Ghost-Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec); gettimeofday(&start, NULL); #ifdef OPENMP int threads = omp_get_max_threads(); if (threads > 32) threads = 32; #pragma omp parallel for num_threads(threads) #endif for (readIndex = 0; readIndex < reads->readCount; readIndex++) { Annotation * annotations = NULL; IDnum annotationCount = 0; Category category; boolean second_in_pair; if (readIndex % 1000000 == 0) velvetLog("Threading through reads %li / %li\n", (long) readIndex, (long) reads->readCount); category = reads->categories[readIndex]; second_in_pair = reads->categories[readIndex] % 2 && isSecondInPair(reads, readIndex); if (referenceMappings) { annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex)); annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]); } threadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex), kmerTable, graph, readIndex + 1, category, readTracking, double_strand, referenceMappings, referenceMappingCount, refCount, annotations, annotationCount, second_in_pair); } gettimeofday(&end, NULL); timersub(&end, &start, &diff); velvetLog(" === Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec); #ifdef OPENMP free(nodeLocks); nodeLocks = NULL; #endif if (referenceMappings) { destroyRoadMapArray(roadmap); free (annotationOffset); } orderNodeReadStartArrays(graph); destroySmallNodeListMemmory(); destroyKmerOccurenceTable(kmerTable); }
static void ghostThreadSequenceThroughGraph(TightString * tString, KmerOccurenceTable * kmerOccurences, Graph * graph, IDnum seqID, Category category, boolean readTracking, boolean double_strand) { Kmer word; Kmer antiWord; Coordinate readNucleotideIndex; KmerOccurence *kmerOccurence; int wordLength = getWordLength(graph); Nucleotide nucleotide; Node *node; Node *previousNode = NULL; clearKmer(&word); clearKmer(&antiWord); // Neglect any read which will not be short paired if ((!readTracking && category % 2 == 0) || category / 2 >= CATEGORIES) return; // Neglect any string shorter than WORDLENGTH : if (getLength(tString) < wordLength) return; // Verify that all short reads are reasonnably short if (getLength(tString) > USHRT_MAX) { printf("Short read of length %lli, longer than limit %i\n", (long long) getLength(tString), SHRT_MAX); puts("You should better declare this sequence as long, because it genuinely is!"); exit(1); } // Allocate memory for the read pairs if (!readStartsAreActivated(graph)) activateReadStarts(graph); // Fill in the initial word : for (readNucleotideIndex = 0; readNucleotideIndex < wordLength - 1; readNucleotideIndex++) { nucleotide = getNucleotide(readNucleotideIndex, tString); pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Go through sequence while (readNucleotideIndex < getLength(tString)) { // Shift word: nucleotide = getNucleotide(readNucleotideIndex++, tString); pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } // Search in table if ((!double_strand || compareKmers(&word, &antiWord) <= 0) && (kmerOccurence = findKmerOccurenceInSortedTable(&word, kmerOccurences))) { node = getNodeInGraph(graph, kmerOccurence->nodeID); } else if ((double_strand && compareKmers(&word, &antiWord) > 0) && (kmerOccurence = findKmerOccurenceInSortedTable(&antiWord, kmerOccurences))) { node = getNodeInGraph(graph, -kmerOccurence->nodeID); } else { node = NULL; if (previousNode) break; } previousNode = node; // Fill in graph if (node && !getNodeStatus(node)) { incrementReadStartCount(node, graph); setSingleNodeStatus(node, true); memorizeNode(node); } } unlockMemorizedNodes(); }