static IDnum chooseNextInternalPreNode(IDnum currentPreNodeID, IDnum sequenceIndex, PreGraph * preGraph, IDnum * chains) { if (currentPreNodeID >= preNodeCount_pg(preGraph)) return 0; if (sequenceIndex >= sequenceCount_pg(preGraph)) return currentPreNodeID + 1; if (currentPreNodeID + 1 < chains[sequenceIndex + 1]) return currentPreNodeID + 1; return 0; }
// Threads each sequences and creates preArcs according to road map indications static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { IDnum sequenceIndex; IDnum referenceCount = rdmaps->referenceCount; #ifdef _OPENMP annotationOffset = mallocOrExit(rdmaps->length + 1, Coordinate); annotationOffset[0] = 0; for (sequenceIndex = 1; sequenceIndex <= rdmaps->length; sequenceIndex++) annotationOffset[sequenceIndex] = annotationOffset[sequenceIndex - 1] + getAnnotationCount(getRoadMapInArray(rdmaps, sequenceIndex - 1)); #else Annotation *annot = rdmaps->annotations; #endif if (rdmaps->referenceCount > 0) allocatePreMarkerCountSpace_pg(preGraph); #ifdef _OPENMP int threads = omp_get_max_threads(); if (threads > 8) threads = 8; #pragma omp parallel for num_threads(threads) #endif for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { #ifdef _OPENMP Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]); #endif RoadMap *rdmap; Coordinate currentPosition, currentInternalPosition; IDnum currentPreNodeID, nextInternalPreNodeID; IDnum annotIndex, lastAnnotIndex; boolean isReference; if (sequenceIndex % 1000000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); isReference = (sequenceIndex <= referenceCount); currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { connectPreNodeToTheNext(¤tPreNodeID, nextInternalPreNodeID, ¤tPosition, sequenceIndex, isReference, preGraph); nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { connectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, isReference, preGraph); annot = getNextAnnotation(annot); annotIndex++; } } } if (rdmaps->referenceCount > 0) { allocatePreMarkerSpace_pg(preGraph); createPreMarkers(rdmaps, preGraph, chains); } #ifdef _OPENMP free(annotationOffset); annotationOffset = NULL; #endif }
static void createPreMarkers(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { IDnum sequenceIndex; IDnum referenceCount = rdmaps->referenceCount; #ifndef _OPENMP Annotation *annot = rdmaps->annotations; #endif #ifdef _OPENMP int threads = omp_get_max_threads(); if (threads > 8) threads = 8; #pragma omp parallel for num_threads(threads) #endif for (sequenceIndex = 1; sequenceIndex <= referenceCount; sequenceIndex++) { #ifdef _OPENMP Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]); #endif RoadMap *rdmap; Coordinate currentPosition, currentInternalPosition; IDnum currentPreNodeID, nextInternalPreNodeID; IDnum annotIndex, lastAnnotIndex; PreMarker * previous; if (sequenceIndex % 1000000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); previous = NULL; currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { #ifdef _OPENMP lockNode(nextInternalPreNodeID); #endif previous = addPreMarker_pg(preGraph, nextInternalPreNodeID, sequenceIndex, ¤tPosition, previous); #ifdef _OPENMP unLockNode(nextInternalPreNodeID); #endif currentPreNodeID = nextInternalPreNodeID; nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { reConnectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, preGraph, &previous); annot = getNextAnnotation(annot); annotIndex++; } } } }
// Creates the preNode using insertion marker and annotation lists for each sequence static void // Creates the preNode using insertion marker and annotation lists for each sequence createPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * markerCounters, InsertionMarker * insertionMarkers, InsertionMarker * veryLastMarker, IDnum * chains, SequencesReader *seqReadInfo, int WORDLENGTH) { char *sequenceFilename = seqReadInfo->m_seqFilename; Annotation *annot = rdmaps->annotations; IDnum latestPreNodeID; InsertionMarker *currentMarker = insertionMarkers; IDnum sequenceIndex; Coordinate currentPosition, nextStop; IDnum preNodeCounter = 1; FILE *file = NULL; char line[50000]; int lineLength = 50000; Coordinate readIndex; boolean tooShort; Kmer initialKmer; char c; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; IDnum markerIndex, lastMarkerIndex; if (!seqReadInfo->m_bIsBinary) { file = fopen(sequenceFilename, "r"); if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not read %s", sequenceFilename); // Reading sequence descriptor in first line if (sequenceCount_pg(preGraph) > 0 && !fgets(line, lineLength, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); seqReadInfo->m_pFile = file; } // Now that we have read all of the annotations, we go on to create the preNodes and tie them up for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { if (sequenceIndex % 1000000 == 0) velvetLog("Sequence %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); if (!seqReadInfo->m_bIsBinary) { while (line[0] != '>') if (!fgets(line, lineLength, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); } rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); markerIndex = 0; lastMarkerIndex = markerCounters[sequenceIndex]; currentPosition = 0; // Reading first (k-1) nucleotides tooShort = false; clearKmer(&initialKmer); //velvetLog("Initial kmer: "); TightString *tString = NULL; char *strString = NULL; if (seqReadInfo->m_bIsBinary) { tString = getTightStringInArray(seqReadInfo->m_sequences->tSequences, sequenceIndex - 1); strString = readTightString(tString); } for (readIndex = 0; readIndex < WORDLENGTH - 1; readIndex++) { if (seqReadInfo->m_bIsBinary) { if (readIndex >= tString->length) { tooShort = true; break; } c = strString[readIndex]; } else { c = getc(file); while (c == '\n' || c == '\r') c = getc(file); if (c == '>' || c == 'M' || c == EOF) { ungetc(c, file); tooShort = true; break; } } switch (c) { case 'A': case 'N': pushNucleotide(&initialKmer, ADENINE); break; case 'C': pushNucleotide(&initialKmer, CYTOSINE); break; case 'G': pushNucleotide(&initialKmer, GUANINE); break; case 'T': pushNucleotide(&initialKmer, THYMINE); break; default: velvetLog ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n"); fflush(stdout); abort(); } } if (tooShort) { //velvetLog("Skipping short read.. %d\n", sequenceIndex); chains[sequenceIndex] = preNodeCounter; if (seqReadInfo->m_bIsBinary) { free(strString); } else { if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); } continue; } char *currString = NULL; if (seqReadInfo->m_bIsBinary) { currString = &strString[readIndex]; seqReadInfo->m_ppCurrString = &currString; } latestPreNodeID = 0; while (annotIndex < lastAnnotIndex) { if (markerIndex == lastMarkerIndex || getPosition(annot) <= getInsertionMarkerPosition(currentMarker)) nextStop = getPosition(annot); else { nextStop = getInsertionMarkerPosition (currentMarker); } if (currentPosition != nextStop) { if (seqReadInfo->m_bIsBinary) { if (readIndex >= tString->length) { velvetLog("readIndex %ld beyond string len %ld\n", (uint64_t) readIndex, (uint64_t) tString->length); exit(1); } } //if (sequenceIndex == 481) // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop); addPreNodeToPreGraph_pg(preGraph, currentPosition, nextStop, seqReadInfo, &initialKmer, preNodeCounter); if (latestPreNodeID == 0) { chains[sequenceIndex] = preNodeCounter; } latestPreNodeID = preNodeCounter++; currentPosition = nextStop; } while (markerIndex < lastMarkerIndex && getInsertionMarkerPosition(currentMarker) == nextStop) { convertMarker(currentMarker, latestPreNodeID); currentMarker++; markerIndex++; } while (annotIndex < lastAnnotIndex && getPosition(annot) == nextStop) { for (readIndex = 0; readIndex < getAnnotationLength(annot); readIndex++) { if (seqReadInfo->m_bIsBinary) { c = *currString; currString += 1; // increment the pointer } else { c = getc(file); while (!isalpha(c)) c = getc(file); } //if (sequenceIndex == 481) // velvetLog("(%c)", c); switch (c) { case 'A': case 'N': pushNucleotide(&initialKmer, ADENINE); break; case 'C': pushNucleotide(&initialKmer, CYTOSINE); break; case 'G': pushNucleotide(&initialKmer, GUANINE); break; case 'T': pushNucleotide(&initialKmer, THYMINE); break; default: velvetLog ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n"); fflush(stdout); #ifdef DEBUG abort(); #endif exit(1); } } annot = getNextAnnotation(annot); annotIndex++; } } while (markerIndex < lastMarkerIndex) { if (currentPosition == getInsertionMarkerPosition(currentMarker)) { convertMarker(currentMarker, latestPreNodeID); currentMarker++; markerIndex++; } else { nextStop = getInsertionMarkerPosition (currentMarker); //if (sequenceIndex == 481) // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop); addPreNodeToPreGraph_pg(preGraph, currentPosition, nextStop, seqReadInfo, &initialKmer, preNodeCounter); if (latestPreNodeID == 0) chains[sequenceIndex] = preNodeCounter; latestPreNodeID = preNodeCounter++; currentPosition = getInsertionMarkerPosition (currentMarker); } } if (seqReadInfo->m_bIsBinary) { free(strString); } else { // End of sequence if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); //velvetLog(" \n"); } if (latestPreNodeID == 0) chains[sequenceIndex] = preNodeCounter; } free(markerCounters); if (!seqReadInfo->m_bIsBinary) { fclose(file); } }
// Counts how many preNodes are to be created to allocate appropriate memory static void countPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * markerCounters, InsertionMarker * insertionMarkers, InsertionMarker * veryLastMarker) { Annotation *annot = rdmaps->annotations; InsertionMarker *currentMarker = insertionMarkers; IDnum markerIndex, lastMarkerIndex; IDnum sequenceIndex; Coordinate currentPosition, nextStop; IDnum preNodeCounter = 0; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; // Now that we have read all of the annotations, we go on to create the preNodes and tie them up for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); markerIndex = 0; lastMarkerIndex = markerCounters[sequenceIndex]; currentPosition = 0; while (annotIndex < lastAnnotIndex) { if (markerIndex == lastMarkerIndex || getPosition(annot) <= getInsertionMarkerPosition(currentMarker)) nextStop = getPosition(annot); else nextStop = getInsertionMarkerPosition (currentMarker); if (currentPosition != nextStop) { preNodeCounter++; currentPosition = nextStop; } while (markerIndex < lastMarkerIndex && getInsertionMarkerPosition(currentMarker) == currentPosition) { currentMarker++; markerIndex++; } while (annotIndex < lastAnnotIndex && getPosition(annot) == currentPosition) { annot = getNextAnnotation(annot); annotIndex++; } } while (markerIndex < lastMarkerIndex) { if (currentPosition == getInsertionMarkerPosition(currentMarker)) { currentMarker++; markerIndex++; } else { preNodeCounter++; currentPosition = getInsertionMarkerPosition (currentMarker); } } } allocatePreNodeSpace_pg(preGraph, preNodeCounter); }
// Threads each sequences and creates preArcs according to road map indications static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { Coordinate currentPosition, currentInternalPosition; IDnum sequenceIndex; Annotation *annot = rdmaps->annotations; IDnum referenceCount = rdmaps->referenceCount; IDnum currentPreNodeID, nextInternalPreNodeID; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; boolean isReference; if (rdmaps->referenceCount > 0) allocatePreMarkerCountSpace_pg(preGraph); for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { if (sequenceIndex % 100000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); isReference = (sequenceIndex <= referenceCount); currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { connectPreNodeToTheNext(¤tPreNodeID, nextInternalPreNodeID, ¤tPosition, sequenceIndex, isReference, preGraph); nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { connectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, isReference, preGraph); annot = getNextAnnotation(annot); annotIndex++; } } } if (rdmaps->referenceCount > 0) { allocatePreMarkerSpace_pg(preGraph); createPreMarkers(rdmaps, preGraph, chains); } }
static void createPreMarkers(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { Coordinate currentPosition, currentInternalPosition; IDnum sequenceIndex; Annotation *annot = rdmaps->annotations; IDnum referenceCount = rdmaps->referenceCount; IDnum currentPreNodeID, nextInternalPreNodeID; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; PreMarker * previous; for (sequenceIndex = 1; sequenceIndex <= referenceCount; sequenceIndex++) { if (sequenceIndex % 100000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); previous = NULL; currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { previous = addPreMarker_pg(preGraph, nextInternalPreNodeID, sequenceIndex, ¤tPosition, previous); currentPreNodeID = nextInternalPreNodeID; nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { reConnectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, preGraph, &previous); annot = getNextAnnotation(annot); annotIndex++; } } } }