void prepareGraphForLocalCorrections(Graph * argGraph) { IDnum nodes = nodeCount(argGraph); IDnum index; //Setting global params graph = argGraph; WORDLENGTH = getWordLength(graph);; // Done with global params velvetLog("Preparing to correct graph with cutoff %f\n", MAXDIVERGENCE); // Allocating memory times = mallocOrExit(2 * nodes + 1, Time); previous = mallocOrExit(2 * nodes + 1, Node *); dheapNodes = mallocOrExit(2 * nodes + 1, DFibHeapNode *); dheap = newDFibHeap(); fastSequence = newTightString(MAXREADLENGTH); slowSequence = newTightString(MAXREADLENGTH); for (index = 0; index < (2 * nodeCount(graph) + 1); index++) { times[index] = -1; dheapNodes[index] = NULL; previous[index] = NULL; } Fmatrix = callocOrExit(MAXREADLENGTH + 1, double *); for (index = 0; index < MAXREADLENGTH + 1; index++) Fmatrix[index] = callocOrExit(MAXREADLENGTH + 1, double); //Done with memory }
KmerOccurenceTable * newKmerOccurenceTable(short int accelerationBits, int wordLength) { KmerOccurenceTable * kmerTable = mallocOrExit(1, KmerOccurenceTable); if (accelerationBits > 2 * wordLength) accelerationBits = 2 * wordLength; if (accelerationBits > 32) accelerationBits = 32; if (accelerationBits > 0) { resetKeyFilter(accelerationBits); kmerTable->accelerationBits = accelerationBits; kmerTable->accelerationTable = callocOrExit((((size_t) 1) << accelerationBits) + 1, IDnum); kmerTable->accelerationShift = (short int) 2 *wordLength - accelerationBits; } else { kmerTable->accelerationBits = 0; kmerTable->accelerationTable = NULL; kmerTable->accelerationShift = 0; } return kmerTable; }
// // Creates a tightString from an array of normal strings // TightString *newTightStringArrayFromStringArray(char **sequences, IDnum sequenceCount, char **tSeqMem) { IDnum sequenceIndex; Codon *tmp; TightString *tStringArray = mallocOrExit(sequenceCount, TightString); Coordinate totalLength = 0; int arrayLength; for (sequenceIndex = 0; sequenceIndex < sequenceCount; sequenceIndex++) { tStringArray[sequenceIndex].length = strlen (sequences[sequenceIndex]); arrayLength = tStringArray[sequenceIndex].length / 4; if (tStringArray[sequenceIndex].length % 4 > 0) arrayLength++; totalLength += arrayLength; } *tSeqMem = callocOrExit (totalLength, char); tmp = (Codon*)*tSeqMem; for (sequenceIndex = 0; sequenceIndex < sequenceCount; sequenceIndex++) { fillTightStringWithString (&tStringArray[sequenceIndex], sequences[sequenceIndex], tmp); arrayLength = tStringArray[sequenceIndex].length / 4; if (tStringArray[sequenceIndex].length % 4 > 0) arrayLength++; tmp += arrayLength; } free(sequences); return tStringArray; }
// // Creates a tightString from a tradionnal string of A,T,G, and C of length size // TightString *newTightStringFromString(char *sequence) { TightString *newTString = mallocOrExit(1, TightString); int size = (int) strlen(sequence); int arrayLength = size / 4; int index; if (size % 4 > 0) arrayLength++; newTString->length = size; newTString->sequence = callocOrExit(arrayLength, Codon); for (index = 0; index < arrayLength; index++) newTString->sequence[index] = 0; for (index = 0; index < size; index++) writeNucleotide(sequence[index], &(newTString->sequence[index / 4]), index % 4); free(sequence); return newTString; }
static void createNodeLocks(PreGraph *preGraph) { IDnum nbNodes; IDnum nodeIndex; nbNodes = preNodeCount_pg(preGraph) + 1; if (nodeLocks) free (nodeLocks); nodeLocks = mallocOrExit(nbNodes, omp_lock_t); #pragma omp parallel for for (nodeIndex = 0; nodeIndex < nbNodes; nodeIndex++) omp_init_lock(nodeLocks + nodeIndex); }
// Imports roadmap from the appropriate file format // Memory allocated within the function RoadMapArray *importRoadMapArray(char *filename) { FILE *file; const int maxline = 100; char *line = mallocOrExit(maxline, char); RoadMap *array; RoadMap *rdmap = NULL; IDnum rdmapIndex = 0; IDnum seqID; Coordinate position, start, finish; Annotation *nextAnnotation; RoadMapArray *result = mallocOrExit(1, RoadMapArray); IDnum sequenceCount; IDnum annotationCount = 0; short short_var; long long_var; long long longlong_var, longlong_var2, longlong_var3; printf("Reading roadmap file %s\n", filename); file = fopen(filename, "r"); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename); sscanf(line, "%ld\t%i\t%hi\n", &long_var, &(result->WORDLENGTH), &short_var); sequenceCount = (IDnum) long_var; resetWordFilter(result->WORDLENGTH); result->length = sequenceCount; array = mallocOrExit(sequenceCount, RoadMap); result->array = array; result->double_strand = (boolean) short_var; while (fgets(line, maxline, file) != NULL) if (line[0] != 'R') annotationCount++; result->annotations = callocOrExit(annotationCount, Annotation); nextAnnotation = result->annotations; fclose(file); file = fopen(filename, "r"); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename); while (fgets(line, maxline, file) != NULL) { if (line[0] == 'R') { rdmap = getRoadMapInArray(result, rdmapIndex++); rdmap->annotationCount = 0; } else { sscanf(line, "%ld\t%lld\t%lld\t%lld\n", &long_var, &longlong_var, &longlong_var2, &longlong_var3); seqID = (IDnum) long_var; position = (Coordinate) longlong_var; start = (Coordinate) longlong_var2; finish = (Coordinate) longlong_var3; nextAnnotation->sequenceID = seqID; nextAnnotation->position = position; nextAnnotation->start.coord = start; nextAnnotation->finish.coord = finish; if (seqID > 0) nextAnnotation->length = finish - start; else nextAnnotation->length = start - finish; rdmap->annotationCount++; nextAnnotation++; } } printf("%d roadmaps reads\n", rdmapIndex); fclose(file); free(line); return result; }
// Threads each sequences and creates preArcs according to road map indications static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * chains) { IDnum sequenceIndex; IDnum referenceCount = rdmaps->referenceCount; #ifdef _OPENMP annotationOffset = mallocOrExit(rdmaps->length + 1, Coordinate); annotationOffset[0] = 0; for (sequenceIndex = 1; sequenceIndex <= rdmaps->length; sequenceIndex++) annotationOffset[sequenceIndex] = annotationOffset[sequenceIndex - 1] + getAnnotationCount(getRoadMapInArray(rdmaps, sequenceIndex - 1)); #else Annotation *annot = rdmaps->annotations; #endif if (rdmaps->referenceCount > 0) allocatePreMarkerCountSpace_pg(preGraph); #ifdef _OPENMP int threads = omp_get_max_threads(); if (threads > 8) threads = 8; #pragma omp parallel for num_threads(threads) #endif for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { #ifdef _OPENMP Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]); #endif RoadMap *rdmap; Coordinate currentPosition, currentInternalPosition; IDnum currentPreNodeID, nextInternalPreNodeID; IDnum annotIndex, lastAnnotIndex; boolean isReference; if (sequenceIndex % 1000000 == 0) velvetLog("Connecting %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); nextInternalPreNodeID = chooseNextInternalPreNode (chains[sequenceIndex] - 1, sequenceIndex, preGraph, chains); isReference = (sequenceIndex <= referenceCount); currentPosition = 0; currentInternalPosition = 0; currentPreNodeID = 0; // Recursion up to last annotation while (annotIndex < lastAnnotIndex || nextInternalPreNodeID != 0) { if (annotIndex == lastAnnotIndex || (nextInternalPreNodeID != 0 && currentInternalPosition < getPosition(annot))) { connectPreNodeToTheNext(¤tPreNodeID, nextInternalPreNodeID, ¤tPosition, sequenceIndex, isReference, preGraph); nextInternalPreNodeID = chooseNextInternalPreNode (currentPreNodeID, sequenceIndex, preGraph, chains); currentInternalPosition += getPreNodeLength_pg(currentPreNodeID, preGraph); } else { connectAnnotation(¤tPreNodeID, annot, ¤tPosition, sequenceIndex, isReference, preGraph); annot = getNextAnnotation(annot); annotIndex++; } } } if (rdmaps->referenceCount > 0) { allocatePreMarkerSpace_pg(preGraph); createPreMarkers(rdmaps, preGraph, chains); } #ifdef _OPENMP free(annotationOffset); annotationOffset = NULL; #endif }
static KmerOccurenceTable *referenceGraphKmers(char *preGraphFilename, short int accelerationBits, Graph * graph, boolean double_strand) { FILE *file = fopen(preGraphFilename, "r"); const int maxline = MAXLINE; char line[MAXLINE]; char c; int wordLength; Coordinate lineLength, kmerCount; Kmer word; Kmer antiWord; KmerOccurenceTable *kmerTable = NULL; KmerOccurence *kmerOccurences, *kmerOccurencePtr; Coordinate kmerOccurenceIndex; IDnum index; IDnum nodeID = 0; IDnum *accelPtr = NULL; KmerKey lastHeader = 0; KmerKey header; Nucleotide nucleotide; if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename); // Count kmers printf("Scanning pre-graph file %s for k-mers\n", preGraphFilename); // First line if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); sscanf(line, "%*i\t%*i\t%i\n", &wordLength); // Initialize kmer occurence table: kmerTable = mallocOrExit(1, KmerOccurenceTable); if (accelerationBits > 2 * wordLength) accelerationBits = 2 * wordLength; if (accelerationBits > 32) accelerationBits = 32; if (accelerationBits > 0) { kmerTable->accelerationBits = accelerationBits; kmerTable->accelerationTable = callocOrExit((((size_t) 1) << accelerationBits) + 1, IDnum); accelPtr = kmerTable->accelerationTable; kmerTable->accelerationShift = (short int) 2 *wordLength - accelerationBits; } else { kmerTable->accelerationBits = 0; kmerTable->accelerationTable = NULL; kmerTable->accelerationShift = 0; } // Read nodes if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); kmerCount = 0; while (line[0] == 'N') { lineLength = 0; while ((c = getc(file)) != EOF && c != '\n') lineLength++; kmerCount += lineLength - wordLength + 1; if (fgets(line, maxline, file) == NULL) break; } fclose(file); // Create table printf("%li kmers found\n", (long) kmerCount); kmerOccurences = callocOrExit(kmerCount, KmerOccurence); kmerOccurencePtr = kmerOccurences; kmerOccurenceIndex = 0; kmerTable->kmerTable = kmerOccurences; kmerTable->kmerTableSize = kmerCount; // Fill table file = fopen(preGraphFilename, "r"); if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); // Read nodes if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); while (line[0] == 'N') { nodeID++; // Fill in the initial word : clearKmer(&word); clearKmer(&antiWord); for (index = 0; index < wordLength - 1; index++) { c = getc(file); if (c == 'A') nucleotide = ADENINE; else if (c == 'C') nucleotide = CYTOSINE; else if (c == 'G') nucleotide = GUANINE; else if (c == 'T') nucleotide = THYMINE; else if (c == '\n') exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); else nucleotide = ADENINE; pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Scan through node index = 0; while((c = getc(file)) != '\n' && c != EOF) { if (c == 'A') nucleotide = ADENINE; else if (c == 'C') nucleotide = CYTOSINE; else if (c == 'G') nucleotide = GUANINE; else if (c == 'T') nucleotide = THYMINE; else nucleotide = ADENINE; pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } if (!double_strand || compareKmers(&word, &antiWord) <= 0) { copyKmers(&kmerOccurencePtr->kmer, &word); kmerOccurencePtr->nodeID = nodeID; kmerOccurencePtr->position = index; } else { copyKmers(&kmerOccurencePtr->kmer, &antiWord); kmerOccurencePtr->nodeID = -nodeID; kmerOccurencePtr->position = getNodeLength(getNodeInGraph(graph, nodeID)) - 1 - index; } kmerOccurencePtr++; kmerOccurenceIndex++; index++; } if (fgets(line, maxline, file) == NULL) break; } fclose(file); // Sort table qsort(kmerOccurences, kmerCount, sizeof(KmerOccurence), compareKmerOccurences); // Fill up acceleration table if (kmerTable->accelerationTable != NULL) { *accelPtr = (IDnum) 0; for (kmerOccurenceIndex = 0; kmerOccurenceIndex < kmerCount; kmerOccurenceIndex++) { header = keyInAccelerationTable(&kmerOccurences [kmerOccurenceIndex]. kmer, kmerTable); while (lastHeader < header) { lastHeader++; accelPtr++; *accelPtr = kmerOccurenceIndex; } } while (lastHeader < (KmerKey) 1 << accelerationBits) { lastHeader++; accelPtr++; *accelPtr = kmerCount; } } return kmerTable; }