static void createPreMarkers(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { IDnum sequenceIndex; IDnum referenceCount = rdmaps->referenceCount; #ifndef _OPENMP Annotation *annot = rdmaps->annotations; #endif #ifdef _OPENMP int threads = omp_get_max_threads(); if (threads > 8) threads = 8; #pragma omp parallel for num_threads(threads) #endif for (sequenceIndex = 1; sequenceIndex <= referenceCount; sequenceIndex++) { #ifdef _OPENMP Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]); #endif RoadMap *rdmap; Coordinate currentPosition, currentInternalPosition; IDnum currentPreNodeID, nextInternalPreNodeID; IDnum annotIndex, lastAnnotIndex; PreMarker * previous; if (sequenceIndex % 1000000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); previous = NULL; currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { #ifdef _OPENMP lockNode(nextInternalPreNodeID); #endif previous = addPreMarker_pg(preGraph, nextInternalPreNodeID, sequenceIndex, ¤tPosition, previous); #ifdef _OPENMP unLockNode(nextInternalPreNodeID); #endif currentPreNodeID = nextInternalPreNodeID; nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { reConnectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, preGraph, &previous); annot = getNextAnnotation(annot); annotIndex++; } } } }
// Threads each sequences and creates preArcs according to road map indications static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { IDnum sequenceIndex; IDnum referenceCount = rdmaps->referenceCount; #ifdef _OPENMP annotationOffset = mallocOrExit(rdmaps->length + 1, Coordinate); annotationOffset[0] = 0; for (sequenceIndex = 1; sequenceIndex <= rdmaps->length; sequenceIndex++) annotationOffset[sequenceIndex] = annotationOffset[sequenceIndex - 1] + getAnnotationCount(getRoadMapInArray(rdmaps, sequenceIndex - 1)); #else Annotation *annot = rdmaps->annotations; #endif if (rdmaps->referenceCount > 0) allocatePreMarkerCountSpace_pg(preGraph); #ifdef _OPENMP int threads = omp_get_max_threads(); if (threads > 8) threads = 8; #pragma omp parallel for num_threads(threads) #endif for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { #ifdef _OPENMP Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]); #endif RoadMap *rdmap; Coordinate currentPosition, currentInternalPosition; IDnum currentPreNodeID, nextInternalPreNodeID; IDnum annotIndex, lastAnnotIndex; boolean isReference; if (sequenceIndex % 1000000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); isReference = (sequenceIndex <= referenceCount); currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { connectPreNodeToTheNext(¤tPreNodeID, nextInternalPreNodeID, ¤tPosition, sequenceIndex, isReference, preGraph); nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { connectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, isReference, preGraph); annot = getNextAnnotation(annot); annotIndex++; } } } if (rdmaps->referenceCount > 0) { allocatePreMarkerSpace_pg(preGraph); createPreMarkers(rdmaps, preGraph, chains); } #ifdef _OPENMP free(annotationOffset); annotationOffset = NULL; #endif }
// Counts how many preNodes are to be created to allocate appropriate memory static void countPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * markerCounters, InsertionMarker * insertionMarkers, InsertionMarker * veryLastMarker) { Annotation *annot = rdmaps->annotations; InsertionMarker *currentMarker = insertionMarkers; IDnum markerIndex, lastMarkerIndex; IDnum sequenceIndex; Coordinate currentPosition, nextStop; IDnum preNodeCounter = 0; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; // Now that we have read all of the annotations, we go on to create the preNodes and tie them up for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); markerIndex = 0; lastMarkerIndex = markerCounters[sequenceIndex]; currentPosition = 0; while (annotIndex < lastAnnotIndex) { if (markerIndex == lastMarkerIndex || getPosition(annot) <= getInsertionMarkerPosition(currentMarker)) nextStop = getPosition(annot); else nextStop = getInsertionMarkerPosition (currentMarker); if (currentPosition != nextStop) { preNodeCounter++; currentPosition = nextStop; } while (markerIndex < lastMarkerIndex && getInsertionMarkerPosition(currentMarker) == currentPosition) { currentMarker++; markerIndex++; } while (annotIndex < lastAnnotIndex && getPosition(annot) == currentPosition) { annot = getNextAnnotation(annot); annotIndex++; } } while (markerIndex < lastMarkerIndex) { if (currentPosition == getInsertionMarkerPosition(currentMarker)) { currentMarker++; markerIndex++; } else { preNodeCounter++; currentPosition = getInsertionMarkerPosition (currentMarker); } } } allocatePreNodeSpace_pg(preGraph, preNodeCounter); }
// Creates the preNode using insertion marker and annotation lists for each sequence static void // Creates the preNode using insertion marker and annotation lists for each sequence createPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * markerCounters, InsertionMarker * insertionMarkers, InsertionMarker * veryLastMarker, IDnum * chains, SequencesReader *seqReadInfo, int WORDLENGTH) { char *sequenceFilename = seqReadInfo->m_seqFilename; Annotation *annot = rdmaps->annotations; IDnum latestPreNodeID; InsertionMarker *currentMarker = insertionMarkers; IDnum sequenceIndex; Coordinate currentPosition, nextStop; IDnum preNodeCounter = 1; FILE *file = NULL; char line[50000]; int lineLength = 50000; Coordinate readIndex; boolean tooShort; Kmer initialKmer; char c; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; IDnum markerIndex, lastMarkerIndex; if (!seqReadInfo->m_bIsBinary) { file = fopen(sequenceFilename, "r"); if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not read %s", sequenceFilename); // Reading sequence descriptor in first line if (sequenceCount_pg(preGraph) > 0 && !fgets(line, lineLength, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); seqReadInfo->m_pFile = file; } // Now that we have read all of the annotations, we go on to create the preNodes and tie them up for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { if (sequenceIndex % 1000000 == 0) velvetLog("Sequence %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); if (!seqReadInfo->m_bIsBinary) { while (line[0] != '>') if (!fgets(line, lineLength, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); } rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); markerIndex = 0; lastMarkerIndex = markerCounters[sequenceIndex]; currentPosition = 0; // Reading first (k-1) nucleotides tooShort = false; clearKmer(&initialKmer); //velvetLog("Initial kmer: "); TightString *tString = NULL; char *strString = NULL; if (seqReadInfo->m_bIsBinary) { tString = getTightStringInArray(seqReadInfo->m_sequences->tSequences, sequenceIndex - 1); strString = readTightString(tString); } for (readIndex = 0; readIndex < WORDLENGTH - 1; readIndex++) { if (seqReadInfo->m_bIsBinary) { if (readIndex >= tString->length) { tooShort = true; break; } c = strString[readIndex]; } else { c = getc(file); while (c == '\n' || c == '\r') c = getc(file); if (c == '>' || c == 'M' || c == EOF) { ungetc(c, file); tooShort = true; break; } } switch (c) { case 'A': case 'N': pushNucleotide(&initialKmer, ADENINE); break; case 'C': pushNucleotide(&initialKmer, CYTOSINE); break; case 'G': pushNucleotide(&initialKmer, GUANINE); break; case 'T': pushNucleotide(&initialKmer, THYMINE); break; default: velvetLog ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n"); fflush(stdout); abort(); } } if (tooShort) { //velvetLog("Skipping short read.. %d\n", sequenceIndex); chains[sequenceIndex] = preNodeCounter; if (seqReadInfo->m_bIsBinary) { free(strString); } else { if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); } continue; } char *currString = NULL; if (seqReadInfo->m_bIsBinary) { currString = &strString[readIndex]; seqReadInfo->m_ppCurrString = &currString; } latestPreNodeID = 0; while (annotIndex < lastAnnotIndex) { if (markerIndex == lastMarkerIndex || getPosition(annot) <= getInsertionMarkerPosition(currentMarker)) nextStop = getPosition(annot); else { nextStop = getInsertionMarkerPosition (currentMarker); } if (currentPosition != nextStop) { if (seqReadInfo->m_bIsBinary) { if (readIndex >= tString->length) { velvetLog("readIndex %ld beyond string len %ld\n", (uint64_t) readIndex, (uint64_t) tString->length); exit(1); } } //if (sequenceIndex == 481) // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop); addPreNodeToPreGraph_pg(preGraph, currentPosition, nextStop, seqReadInfo, &initialKmer, preNodeCounter); if (latestPreNodeID == 0) { chains[sequenceIndex] = preNodeCounter; } latestPreNodeID = preNodeCounter++; currentPosition = nextStop; } while (markerIndex < lastMarkerIndex && getInsertionMarkerPosition(currentMarker) == nextStop) { convertMarker(currentMarker, latestPreNodeID); currentMarker++; markerIndex++; } while (annotIndex < lastAnnotIndex && getPosition(annot) == nextStop) { for (readIndex = 0; readIndex < getAnnotationLength(annot); readIndex++) { if (seqReadInfo->m_bIsBinary) { c = *currString; currString += 1; // increment the pointer } else { c = getc(file); while (!isalpha(c)) c = getc(file); } //if (sequenceIndex == 481) // velvetLog("(%c)", c); switch (c) { case 'A': case 'N': pushNucleotide(&initialKmer, ADENINE); break; case 'C': pushNucleotide(&initialKmer, CYTOSINE); break; case 'G': pushNucleotide(&initialKmer, GUANINE); break; case 'T': pushNucleotide(&initialKmer, THYMINE); break; default: velvetLog ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n"); fflush(stdout); #ifdef DEBUG abort(); #endif exit(1); } } annot = getNextAnnotation(annot); annotIndex++; } } while (markerIndex < lastMarkerIndex) { if (currentPosition == getInsertionMarkerPosition(currentMarker)) { convertMarker(currentMarker, latestPreNodeID); currentMarker++; markerIndex++; } else { nextStop = getInsertionMarkerPosition (currentMarker); //if (sequenceIndex == 481) // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop); addPreNodeToPreGraph_pg(preGraph, currentPosition, nextStop, seqReadInfo, &initialKmer, preNodeCounter); if (latestPreNodeID == 0) chains[sequenceIndex] = preNodeCounter; latestPreNodeID = preNodeCounter++; currentPosition = getInsertionMarkerPosition (currentMarker); } } if (seqReadInfo->m_bIsBinary) { free(strString); } else { // End of sequence if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); //velvetLog(" \n"); } if (latestPreNodeID == 0) chains[sequenceIndex] = preNodeCounter; } free(markerCounters); if (!seqReadInfo->m_bIsBinary) { fclose(file); } }
// Creates insertion marker lists static void setInsertionMarkers(RoadMapArray * rdmaps, IDnum * markerCounters, InsertionMarker ** veryLastMarker, InsertionMarker ** insertionMarkers) { IDnum sequenceCounter = rdmaps->length; IDnum sequenceIndex, sequenceIndex2; Coordinate totalCount = 0; RoadMap *rdmap; Annotation *annot = rdmaps->annotations; InsertionMarker *nextMarker, *newMarker; IDnum annotIndex, lastAnnotIndex; InsertionMarker **insMarkers = callocOrExit(rdmaps->length + 1, InsertionMarker *); // Counting insertion markers for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1; sequenceIndex++) { //velvetLog("Going through sequence %d\n", sequenceIndex); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); lastAnnotIndex = getAnnotationCount(rdmap); // Set insertion markers in previous sequences : for (annotIndex = 0; annotIndex < lastAnnotIndex; annotIndex++) { if (getAnnotSequenceID(annot) > 0) { markerCounters[getAnnotSequenceID(annot)] += 2; } else { markerCounters[-getAnnotSequenceID(annot)] += 2; } totalCount += 2; annot = getNextAnnotation(annot); } } // Allocating space *insertionMarkers = callocOrExit(totalCount, InsertionMarker); *veryLastMarker = *insertionMarkers + totalCount; // Pointing each node to its space nextMarker = *insertionMarkers; for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1; sequenceIndex++) { insMarkers[sequenceIndex] = nextMarker; nextMarker = nextMarker + markerCounters[sequenceIndex]; markerCounters[sequenceIndex] = 0; } // Filling up space with data annot = rdmaps->annotations; for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1; sequenceIndex++) { //velvetLog("Going through sequence %d\n", sequenceIndex); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); lastAnnotIndex = getAnnotationCount(rdmap); // Set insertion markers in previous sequences : for (annotIndex = 0; annotIndex < lastAnnotIndex; annotIndex++) { sequenceIndex2 = getAnnotSequenceID(annot); if (sequenceIndex2 > 0) { newMarker = insMarkers[sequenceIndex2] + (markerCounters[sequenceIndex2])++; newMarker->annot = annot; newMarker->isStart = true; newMarker = insMarkers[sequenceIndex2] + (markerCounters[sequenceIndex2])++; newMarker->annot = annot; newMarker->isStart = false; } else { incrementAnnotationCoordinates(annot); newMarker = insMarkers[-sequenceIndex2] + (markerCounters[-sequenceIndex2])++; newMarker->annot = annot; newMarker->isStart = true; newMarker = insMarkers[-sequenceIndex2] + (markerCounters[-sequenceIndex2])++; newMarker->annot = annot; newMarker->isStart = false; } annot = getNextAnnotation(annot); } } orderInsertionMarkers(insMarkers, markerCounters, rdmaps); free(insMarkers); }
static void threadSequenceThroughGraph(TightString * tString, KmerOccurenceTable * kmerTable, Graph * graph, IDnum seqID, Category category, boolean readTracking, boolean double_strand, ReferenceMapping * referenceMappings, Coordinate referenceMappingCount, IDnum refCount, Annotation * annotations, IDnum annotationCount, boolean second_in_pair) { Kmer word; Kmer antiWord; Coordinate readNucleotideIndex; Coordinate kmerIndex; KmerOccurence *kmerOccurence; int wordLength = getWordLength(graph); PassageMarkerI marker = NULL_IDX; PassageMarkerI previousMarker = NULL_IDX; Node *node = NULL; Node *previousNode = NULL; Coordinate coord = 0; Coordinate previousCoord = 0; Nucleotide nucleotide; boolean reversed; IDnum refID; Coordinate refCoord = 0; ReferenceMapping * refMap; Annotation * annotation = annotations; Coordinate index = 0; Coordinate uniqueIndex = 0; Coordinate annotIndex = 0; IDnum annotCount = 0; SmallNodeList * nodePile = NULL; // Neglect any string shorter than WORDLENGTH : if (getLength(tString) < wordLength) return; clearKmer(&word); clearKmer(&antiWord); // Fill in the initial word : for (readNucleotideIndex = 0; readNucleotideIndex < wordLength - 1; readNucleotideIndex++) { nucleotide = getNucleotide(readNucleotideIndex, tString); pushNucleotide(&word, nucleotide); if (double_strand || second_in_pair) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Go through sequence while (readNucleotideIndex < getLength(tString)) { nucleotide = getNucleotide(readNucleotideIndex++, tString); pushNucleotide(&word, nucleotide); if (double_strand || second_in_pair) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } // Update annotation if necessary if (annotCount < annotationCount && annotIndex == getAnnotationLength(annotation)) { annotation = getNextAnnotation(annotation); annotCount++; annotIndex = 0; } // Search for reference mapping if (category == REFERENCE) { if (referenceMappings) refMap = findReferenceMapping(seqID, index, referenceMappings, referenceMappingCount); else refMap = NULL; if (refMap) { node = getNodeInGraph(graph, refMap->nodeID); if (refMap->nodeID > 0) { coord = refMap->nodeStart + (index - refMap->referenceStart); } else { coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (index - refMap->referenceStart); } } else { node = NULL; if (previousNode) break; } } // Search for reference-based mapping else if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation) && getAnnotSequenceID(annotation) <= refCount && getAnnotSequenceID(annotation) >= -refCount) { refID = getAnnotSequenceID(annotation); if (refID > 0) refCoord = getStart(annotation) + annotIndex; else refCoord = getStart(annotation) - annotIndex; refMap = findReferenceMapping(refID, refCoord, referenceMappings, referenceMappingCount); // If success if (refMap) { if (refID > 0) { node = getNodeInGraph(graph, refMap->nodeID); if (refMap->nodeID > 0) { coord = refMap->nodeStart + (refCoord - refMap->referenceStart); } else { coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (refCoord - refMap->referenceStart); } } else { node = getNodeInGraph(graph, -refMap->nodeID); if (refMap->nodeID > 0) { coord = getNodeLength(node) - refMap->nodeStart - (refCoord - refMap->referenceStart) - 1; } else { coord = refMap->nodeStart + refMap->length - (refCoord - refMap->referenceStart) - 1; } } } else { node = NULL; if (previousNode) break; } } // Search in table else { reversed = false; if (double_strand) { if (compareKmers(&word, &antiWord) <= 0) { kmerOccurence = findKmerInKmerOccurenceTable(&word, kmerTable); } else { kmerOccurence = findKmerInKmerOccurenceTable(&antiWord, kmerTable); reversed = true; } } else { if (!second_in_pair) { kmerOccurence = findKmerInKmerOccurenceTable(&word, kmerTable); } else { kmerOccurence = findKmerInKmerOccurenceTable(&antiWord, kmerTable); reversed = true; } } if (kmerOccurence) { if (!reversed) { node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence)); coord = getKmerOccurencePosition(kmerOccurence); } else { node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence)); coord = getNodeLength(node) - getKmerOccurencePosition(kmerOccurence) - 1; } } else { node = NULL; if (previousNode) break; } } // Increment positions if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation)) annotIndex++; else uniqueIndex++; // Fill in graph if (node) { #ifdef OPENMP lockNode(node); #endif kmerIndex = readNucleotideIndex - wordLength; if (previousNode == node && previousCoord == coord - 1) { if (category / 2 >= CATEGORIES) { setPassageMarkerFinish(marker, kmerIndex + 1); setFinishOffset(marker, getNodeLength(node) - coord - 1); } else { #ifndef SINGLE_COV_CAT incrementVirtualCoverage(node, category / 2, 1); incrementOriginalVirtualCoverage(node, category / 2, 1); #else incrementVirtualCoverage(node, 1); #endif } #ifdef OPENMP unLockNode(node); #endif } else { if (category / 2 >= CATEGORIES) { marker = newPassageMarker(seqID, kmerIndex, kmerIndex + 1, coord, getNodeLength (node) - coord - 1); transposePassageMarker(marker, node); connectPassageMarkers (previousMarker, marker, graph); previousMarker = marker; } else { if (readTracking) { if (!isNodeMemorized(node, nodePile)) { addReadStart(node, seqID, coord, graph, kmerIndex); memorizeNode(node, &nodePile); } else { blurLastShortReadMarker (node, graph); } } #ifndef SINGLE_COV_CAT incrementVirtualCoverage(node, category / 2, 1); incrementOriginalVirtualCoverage(node, category / 2, 1); #else incrementVirtualCoverage(node, 1); #endif } #ifdef OPENMP lockTwoNodes(node, previousNode); #endif createArc(previousNode, node, graph); #ifdef OPENMP unLockTwoNodes(node, previousNode); #endif } previousNode = node; previousCoord = coord; } index++; } if (readTracking && category / 2 < CATEGORIES) unMemorizeNodes(&nodePile); }
static void ghostThreadSequenceThroughGraph(TightString * tString, KmerOccurenceTable * kmerTable, Graph * graph, IDnum seqID, Category category, boolean readTracking, boolean double_strand, ReferenceMapping * referenceMappings, Coordinate referenceMappingCount, IDnum refCount, Annotation * annotations, IDnum annotationCount, boolean second_in_pair) { Kmer word; Kmer antiWord; Coordinate readNucleotideIndex; KmerOccurence *kmerOccurence; int wordLength = getWordLength(graph); Nucleotide nucleotide; IDnum refID; Coordinate refCoord; ReferenceMapping * refMap = NULL; Coordinate uniqueIndex = 0; Coordinate annotIndex = 0; IDnum annotCount = 0; boolean reversed; SmallNodeList * nodePile = NULL; Annotation * annotation = annotations; Node *node; Node *previousNode = NULL; // Neglect any read which will not be short paired if ((!readTracking && category % 2 == 0) || category / 2 >= CATEGORIES) return; // Neglect any string shorter than WORDLENGTH : if (getLength(tString) < wordLength) return; // Verify that all short reads are reasonnably short if (getLength(tString) > USHRT_MAX) { velvetLog("Short read of length %lli, longer than limit %i\n", (long long) getLength(tString), SHRT_MAX); velvetLog("You should better declare this sequence as long, because it genuinely is!\n"); exit(1); } clearKmer(&word); clearKmer(&antiWord); // Fill in the initial word : for (readNucleotideIndex = 0; readNucleotideIndex < wordLength - 1; readNucleotideIndex++) { nucleotide = getNucleotide(readNucleotideIndex, tString); pushNucleotide(&word, nucleotide); if (double_strand || second_in_pair) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Go through sequence while (readNucleotideIndex < getLength(tString)) { // Shift word: nucleotide = getNucleotide(readNucleotideIndex++, tString); pushNucleotide(&word, nucleotide); if (double_strand || second_in_pair) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } // Update annotation if necessary if (annotCount < annotationCount && annotIndex == getAnnotationLength(annotation)) { annotation = getNextAnnotation(annotation); annotCount++; annotIndex = 0; } // Search for reference mapping if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation) && getAnnotSequenceID(annotation) <= refCount && getAnnotSequenceID(annotation) >= -refCount) { refID = getAnnotSequenceID(annotation); if (refID > 0) refCoord = getStart(annotation) + annotIndex; else refCoord = getStart(annotation) - annotIndex; refMap = findReferenceMapping(refID, refCoord, referenceMappings, referenceMappingCount); // If success if (refMap) { if (refID > 0) node = getNodeInGraph(graph, refMap->nodeID); else node = getNodeInGraph(graph, -refMap->nodeID); } else { node = NULL; if (previousNode) break; } } // if not.. look in table else { reversed = false; if (double_strand) { if (compareKmers(&word, &antiWord) <= 0) { kmerOccurence = findKmerInKmerOccurenceTable(&word, kmerTable); } else { kmerOccurence = findKmerInKmerOccurenceTable(&antiWord, kmerTable); reversed = true; } } else { if (!second_in_pair) { kmerOccurence = findKmerInKmerOccurenceTable(&word, kmerTable); } else { kmerOccurence = findKmerInKmerOccurenceTable(&antiWord, kmerTable); reversed = true; } } if (kmerOccurence) { if (!reversed) node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence)); else node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence)); } else { node = NULL; if (previousNode) break; } } if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation)) annotIndex++; else uniqueIndex++; previousNode = node; // Fill in graph if (node && !isNodeMemorized(node, nodePile)) { #ifdef OPENMP lockNode(node); #endif incrementReadStartCount(node, graph); #ifdef OPENMP unLockNode(node); #endif memorizeNode(node, &nodePile); } } unMemorizeNodes(&nodePile); }
// Threads each sequences and creates preArcs according to road map indications static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { Coordinate currentPosition, currentInternalPosition; IDnum sequenceIndex; Annotation *annot = rdmaps->annotations; IDnum referenceCount = rdmaps->referenceCount; IDnum currentPreNodeID, nextInternalPreNodeID; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; boolean isReference; if (rdmaps->referenceCount > 0) allocatePreMarkerCountSpace_pg(preGraph); for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { if (sequenceIndex % 100000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); isReference = (sequenceIndex <= referenceCount); currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { connectPreNodeToTheNext(¤tPreNodeID, nextInternalPreNodeID, ¤tPosition, sequenceIndex, isReference, preGraph); nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { connectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, isReference, preGraph); annot = getNextAnnotation(annot); annotIndex++; } } } if (rdmaps->referenceCount > 0) { allocatePreMarkerSpace_pg(preGraph); createPreMarkers(rdmaps, preGraph, chains); } }
static void createPreMarkers(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { Coordinate currentPosition, currentInternalPosition; IDnum sequenceIndex; Annotation *annot = rdmaps->annotations; IDnum referenceCount = rdmaps->referenceCount; IDnum currentPreNodeID, nextInternalPreNodeID; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; PreMarker * previous; for (sequenceIndex = 1; sequenceIndex <= referenceCount; sequenceIndex++) { if (sequenceIndex % 100000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); previous = NULL; currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { previous = addPreMarker_pg(preGraph, nextInternalPreNodeID, sequenceIndex, ¤tPosition, previous); currentPreNodeID = nextInternalPreNodeID; nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { reConnectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, preGraph, &previous); annot = getNextAnnotation(annot); annotIndex++; } } } }