static void allocExitError(const char *function, unsigned long long count, unsigned long long size, const char *name) { if (size == 1) exitErrorf(EXIT_FAILURE, true, "Can't %s %llu %ss", function, count, name); else exitErrorf(EXIT_FAILURE, true, "Can't %s %llu %ss totalling %llu bytes", function, count, name, count * size); }
// Constructor // Memory allocated FibHeap *newFibHeap() { FibHeap * heap = fh_makekeyheap(); if (heap == NULL) exitErrorf(EXIT_FAILURE, true, "Can't allocate FibHeap"); return heap; }
void exportASEvents(Locus * loci, IDnum locusCount, char *filename) { FILE *outfile = fopen(filename, "w"); IDnum index; if (outfile) velvetLog("Exporting AS events to %s\n", filename); else exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename); for (index = 0; index < locusCount; index++) exportLocusASEvents(index, &(loci[index]), outfile); fclose(outfile); }
AutoFile* openFileAuto(char*filename) { AutoFile* seqFile = (AutoFile*)calloc(1, sizeof(AutoFile)); int i; if (strcmp(filename, "-")==0) exitErrorf(EXIT_FAILURE, false, "Cannot read from stdin in auto mode\n"); for (i=0; decompressors[i] ; i++) { if (strlen(decompressors[i])==0) { seqFile->file = fopen(filename, "r"); seqFile->pid = 0; seqFile->decompressor = "Raw read"; } else { printf("auto file not supported!\n"); exit(0); //printf("Trying : %s\n", decompressors[i]); //char const* args[] = {decompressors[i], "-c", "-d", filename, NULL}; //seqFile->file = popenNoStderr(args[0], args, &(seqFile->pid)); //seqFile->decompressor = decompressors[i]; } if (!seqFile->file) continue; int c = fgetc(seqFile->file); if (c=='>' || c=='@') { // Ok, looks like FASTA or FASTQ ungetc(c, seqFile->file); seqFile->first_char = c; return seqFile; } else { //if (seqFile->pid) // pcloseNoStderr(seqFile->pid, seqFile->file); //else // fclose(seqFile->file); printf("should not run here!\n"); exit(0); } } //printf("Unable to determine file type\n"); return NULL; }
AutoFile* openFileAuto(char*filename) { AutoFile* seqFile = calloc(1, sizeof(AutoFile)); int i; if (strcmp(filename, "-")==0) exitErrorf(EXIT_FAILURE, false, "Cannot read from stdin in auto mode\n"); for (i=0; decompressors[i] ; i++) { if (strlen(decompressors[i])==0) { seqFile->file = fopen(filename, "r"); seqFile->pid = 0; seqFile->decompressor = "Raw read"; } else { char *cmd = calloc(strlen(decompressors[i]) + strlen(filename) + 8, sizeof(char)); sprintf(cmd, "%s -c -d %s", decompressors[i], filename); seqFile->file = _popen(cmd, "r"); seqFile->decompressor = decompressors[i]; seqFile->pid = 1; } if (!seqFile->file) continue; int c = fgetc(seqFile->file); if (c=='>' || c=='@') { // Ok, looks like FASTA or FASTQ ungetc(c, seqFile->file); seqFile->first_char = c; return seqFile; } else { if (seqFile->pid) _pclose(seqFile->file); else fclose(seqFile->file); } } //printf("Unable to determine file type\n"); return NULL; }
// Imports roadmap from the appropriate file format // Memory allocated within the function RoadMapArray *importRoadMapArray(char *filename) { FILE *file; const int maxline = 100; char *line = mallocOrExit(maxline, char); RoadMap *array; RoadMap *rdmap = NULL; IDnum rdmapIndex = 0; IDnum seqID; Coordinate position, start, finish; Annotation *nextAnnotation; RoadMapArray *result = mallocOrExit(1, RoadMapArray); IDnum sequenceCount; IDnum annotationCount = 0; short short_var; long long_var; long long longlong_var, longlong_var2, longlong_var3; printf("Reading roadmap file %s\n", filename); file = fopen(filename, "r"); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename); sscanf(line, "%ld\t%i\t%hi\n", &long_var, &(result->WORDLENGTH), &short_var); sequenceCount = (IDnum) long_var; resetWordFilter(result->WORDLENGTH); result->length = sequenceCount; array = mallocOrExit(sequenceCount, RoadMap); result->array = array; result->double_strand = (boolean) short_var; while (fgets(line, maxline, file) != NULL) if (line[0] != 'R') annotationCount++; result->annotations = callocOrExit(annotationCount, Annotation); nextAnnotation = result->annotations; fclose(file); file = fopen(filename, "r"); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", filename); while (fgets(line, maxline, file) != NULL) { if (line[0] == 'R') { rdmap = getRoadMapInArray(result, rdmapIndex++); rdmap->annotationCount = 0; } else { sscanf(line, "%ld\t%lld\t%lld\t%lld\n", &long_var, &longlong_var, &longlong_var2, &longlong_var3); seqID = (IDnum) long_var; position = (Coordinate) longlong_var; start = (Coordinate) longlong_var2; finish = (Coordinate) longlong_var3; nextAnnotation->sequenceID = seqID; nextAnnotation->position = position; nextAnnotation->start.coord = start; nextAnnotation->finish.coord = finish; if (seqID > 0) nextAnnotation->length = finish - start; else nextAnnotation->length = start - finish; rdmap->annotationCount++; nextAnnotation++; } } printf("%d roadmaps reads\n", rdmapIndex); fclose(file); free(line); return result; }
// Creates the preNode using insertion marker and annotation lists for each sequence static void // Creates the preNode using insertion marker and annotation lists for each sequence createPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph, IDnum * markerCounters, InsertionMarker * insertionMarkers, InsertionMarker * veryLastMarker, IDnum * chains, SequencesReader *seqReadInfo, int WORDLENGTH) { char *sequenceFilename = seqReadInfo->m_seqFilename; Annotation *annot = rdmaps->annotations; IDnum latestPreNodeID; InsertionMarker *currentMarker = insertionMarkers; IDnum sequenceIndex; Coordinate currentPosition, nextStop; IDnum preNodeCounter = 1; FILE *file = NULL; char line[50000]; int lineLength = 50000; Coordinate readIndex; boolean tooShort; Kmer initialKmer; char c; RoadMap *rdmap; IDnum annotIndex, lastAnnotIndex; IDnum markerIndex, lastMarkerIndex; if (!seqReadInfo->m_bIsBinary) { file = fopen(sequenceFilename, "r"); if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not read %s", sequenceFilename); // Reading sequence descriptor in first line if (sequenceCount_pg(preGraph) > 0 && !fgets(line, lineLength, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); seqReadInfo->m_pFile = file; } // Now that we have read all of the annotations, we go on to create the preNodes and tie them up for (sequenceIndex = 1; sequenceIndex <= sequenceCount_pg(preGraph); sequenceIndex++) { if (sequenceIndex % 1000000 == 0) velvetLog("Sequence %li / %li\n", (long) sequenceIndex, (long) sequenceCount_pg(preGraph)); if (!seqReadInfo->m_bIsBinary) { while (line[0] != '>') if (!fgets(line, lineLength, file)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); } rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1); annotIndex = 0; lastAnnotIndex = getAnnotationCount(rdmap); markerIndex = 0; lastMarkerIndex = markerCounters[sequenceIndex]; currentPosition = 0; // Reading first (k-1) nucleotides tooShort = false; clearKmer(&initialKmer); //velvetLog("Initial kmer: "); TightString *tString = NULL; char *strString = NULL; if (seqReadInfo->m_bIsBinary) { tString = getTightStringInArray(seqReadInfo->m_sequences->tSequences, sequenceIndex - 1); strString = readTightString(tString); } for (readIndex = 0; readIndex < WORDLENGTH - 1; readIndex++) { if (seqReadInfo->m_bIsBinary) { if (readIndex >= tString->length) { tooShort = true; break; } c = strString[readIndex]; } else { c = getc(file); while (c == '\n' || c == '\r') c = getc(file); if (c == '>' || c == 'M' || c == EOF) { ungetc(c, file); tooShort = true; break; } } switch (c) { case 'A': case 'N': pushNucleotide(&initialKmer, ADENINE); break; case 'C': pushNucleotide(&initialKmer, CYTOSINE); break; case 'G': pushNucleotide(&initialKmer, GUANINE); break; case 'T': pushNucleotide(&initialKmer, THYMINE); break; default: velvetLog ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n"); fflush(stdout); abort(); } } if (tooShort) { //velvetLog("Skipping short read.. %d\n", sequenceIndex); chains[sequenceIndex] = preNodeCounter; if (seqReadInfo->m_bIsBinary) { free(strString); } else { if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); } continue; } char *currString = NULL; if (seqReadInfo->m_bIsBinary) { currString = &strString[readIndex]; seqReadInfo->m_ppCurrString = &currString; } latestPreNodeID = 0; while (annotIndex < lastAnnotIndex) { if (markerIndex == lastMarkerIndex || getPosition(annot) <= getInsertionMarkerPosition(currentMarker)) nextStop = getPosition(annot); else { nextStop = getInsertionMarkerPosition (currentMarker); } if (currentPosition != nextStop) { if (seqReadInfo->m_bIsBinary) { if (readIndex >= tString->length) { velvetLog("readIndex %ld beyond string len %ld\n", (uint64_t) readIndex, (uint64_t) tString->length); exit(1); } } //if (sequenceIndex == 481) // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop); addPreNodeToPreGraph_pg(preGraph, currentPosition, nextStop, seqReadInfo, &initialKmer, preNodeCounter); if (latestPreNodeID == 0) { chains[sequenceIndex] = preNodeCounter; } latestPreNodeID = preNodeCounter++; currentPosition = nextStop; } while (markerIndex < lastMarkerIndex && getInsertionMarkerPosition(currentMarker) == nextStop) { convertMarker(currentMarker, latestPreNodeID); currentMarker++; markerIndex++; } while (annotIndex < lastAnnotIndex && getPosition(annot) == nextStop) { for (readIndex = 0; readIndex < getAnnotationLength(annot); readIndex++) { if (seqReadInfo->m_bIsBinary) { c = *currString; currString += 1; // increment the pointer } else { c = getc(file); while (!isalpha(c)) c = getc(file); } //if (sequenceIndex == 481) // velvetLog("(%c)", c); switch (c) { case 'A': case 'N': pushNucleotide(&initialKmer, ADENINE); break; case 'C': pushNucleotide(&initialKmer, CYTOSINE); break; case 'G': pushNucleotide(&initialKmer, GUANINE); break; case 'T': pushNucleotide(&initialKmer, THYMINE); break; default: velvetLog ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n"); fflush(stdout); #ifdef DEBUG abort(); #endif exit(1); } } annot = getNextAnnotation(annot); annotIndex++; } } while (markerIndex < lastMarkerIndex) { if (currentPosition == getInsertionMarkerPosition(currentMarker)) { convertMarker(currentMarker, latestPreNodeID); currentMarker++; markerIndex++; } else { nextStop = getInsertionMarkerPosition (currentMarker); //if (sequenceIndex == 481) // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop); addPreNodeToPreGraph_pg(preGraph, currentPosition, nextStop, seqReadInfo, &initialKmer, preNodeCounter); if (latestPreNodeID == 0) chains[sequenceIndex] = preNodeCounter; latestPreNodeID = preNodeCounter++; currentPosition = getInsertionMarkerPosition (currentMarker); } } if (seqReadInfo->m_bIsBinary) { free(strString); } else { // End of sequence if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph)) exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename); //velvetLog(" \n"); } if (latestPreNodeID == 0) chains[sequenceIndex] = preNodeCounter; } free(markerCounters); if (!seqReadInfo->m_bIsBinary) { fclose(file); } }
static KmerOccurenceTable *referenceGraphKmers(char *preGraphFilename, short int accelerationBits, Graph * graph, boolean double_strand, NodeMask * nodeMasks, Coordinate nodeMaskCount) { FILE *file = fopen(preGraphFilename, "r"); const int maxline = MAXLINE; char line[MAXLINE]; char c; int wordLength; Coordinate lineLength, kmerCount; Kmer word; Kmer antiWord; KmerOccurenceTable *kmerTable; IDnum index; IDnum nodeID = 0; Nucleotide nucleotide; NodeMask * nodeMask = nodeMasks; Coordinate nodeMaskIndex = 0; if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename); // Count kmers velvetLog("Scanning pre-graph file %s for k-mers\n", preGraphFilename); // First line if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); sscanf(line, "%*i\t%*i\t%i\n", &wordLength); kmerTable = newKmerOccurenceTable(accelerationBits, wordLength); // Read nodes if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); kmerCount = 0; while (line[0] == 'N') { lineLength = 0; while ((c = getc(file)) != EOF && c != '\n') lineLength++; kmerCount += lineLength - wordLength + 1; if (fgets(line, maxline, file) == NULL) break; } velvetLog("%li kmers found\n", (long) kmerCount); for(nodeMaskIndex = 0; nodeMaskIndex < nodeMaskCount; nodeMaskIndex++) { kmerCount -= nodeMasks[nodeMaskIndex].finish - nodeMasks[nodeMaskIndex].start; } nodeMaskIndex = 0; fclose(file); // Create table allocateKmerOccurences(kmerCount, kmerTable); // Fill table file = fopen(preGraphFilename, "r"); if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); // Read nodes if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); while (line[0] == 'N') { nodeID++; // Fill in the initial word : clearKmer(&word); clearKmer(&antiWord); for (index = 0; index < wordLength - 1; index++) { c = getc(file); if (c == 'A') nucleotide = ADENINE; else if (c == 'C') nucleotide = CYTOSINE; else if (c == 'G') nucleotide = GUANINE; else if (c == 'T') nucleotide = THYMINE; else if (c == '\n') exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); else nucleotide = ADENINE; pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Scan through node index = 0; while((c = getc(file)) != '\n' && c != EOF) { if (c == 'A') nucleotide = ADENINE; else if (c == 'C') nucleotide = CYTOSINE; else if (c == 'G') nucleotide = GUANINE; else if (c == 'T') nucleotide = THYMINE; else nucleotide = ADENINE; pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } // Update mask if necessary if (nodeMask) { if (nodeMask->nodeID < nodeID || (nodeMask->nodeID == nodeID && index >= nodeMask->finish)) { if (++nodeMaskIndex == nodeMaskCount) nodeMask = NULL; else nodeMask++; } } // Check if not masked! if (nodeMask) { if (nodeMask->nodeID == nodeID && index >= nodeMask->start && index < nodeMask->finish) { index++; continue; } } if (!double_strand || compareKmers(&word, &antiWord) <= 0) recordKmerOccurence(&word, nodeID, index, kmerTable); else recordKmerOccurence(&antiWord, -nodeID, getNodeLength(getNodeInGraph(graph, nodeID)) - 1 - index, kmerTable); index++; } if (fgets(line, maxline, file) == NULL) break; } fclose(file); // Sort table sortKmerOccurenceTable(kmerTable); return kmerTable; }
static KmerOccurenceTable *referenceGraphKmers(char *preGraphFilename, short int accelerationBits, Graph * graph, boolean double_strand) { FILE *file = fopen(preGraphFilename, "r"); const int maxline = MAXLINE; char line[MAXLINE]; char c; int wordLength; Coordinate lineLength, kmerCount; Kmer word; Kmer antiWord; KmerOccurenceTable *kmerTable = NULL; KmerOccurence *kmerOccurences, *kmerOccurencePtr; Coordinate kmerOccurenceIndex; IDnum index; IDnum nodeID = 0; IDnum *accelPtr = NULL; KmerKey lastHeader = 0; KmerKey header; Nucleotide nucleotide; if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename); // Count kmers printf("Scanning pre-graph file %s for k-mers\n", preGraphFilename); // First line if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); sscanf(line, "%*i\t%*i\t%i\n", &wordLength); // Initialize kmer occurence table: kmerTable = mallocOrExit(1, KmerOccurenceTable); if (accelerationBits > 2 * wordLength) accelerationBits = 2 * wordLength; if (accelerationBits > 32) accelerationBits = 32; if (accelerationBits > 0) { kmerTable->accelerationBits = accelerationBits; kmerTable->accelerationTable = callocOrExit((((size_t) 1) << accelerationBits) + 1, IDnum); accelPtr = kmerTable->accelerationTable; kmerTable->accelerationShift = (short int) 2 *wordLength - accelerationBits; } else { kmerTable->accelerationBits = 0; kmerTable->accelerationTable = NULL; kmerTable->accelerationShift = 0; } // Read nodes if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); kmerCount = 0; while (line[0] == 'N') { lineLength = 0; while ((c = getc(file)) != EOF && c != '\n') lineLength++; kmerCount += lineLength - wordLength + 1; if (fgets(line, maxline, file) == NULL) break; } fclose(file); // Create table printf("%li kmers found\n", (long) kmerCount); kmerOccurences = callocOrExit(kmerCount, KmerOccurence); kmerOccurencePtr = kmerOccurences; kmerOccurenceIndex = 0; kmerTable->kmerTable = kmerOccurences; kmerTable->kmerTableSize = kmerCount; // Fill table file = fopen(preGraphFilename, "r"); if (file == NULL) exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename); if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); // Read nodes if (!fgets(line, maxline, file)) exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); while (line[0] == 'N') { nodeID++; // Fill in the initial word : clearKmer(&word); clearKmer(&antiWord); for (index = 0; index < wordLength - 1; index++) { c = getc(file); if (c == 'A') nucleotide = ADENINE; else if (c == 'C') nucleotide = CYTOSINE; else if (c == 'G') nucleotide = GUANINE; else if (c == 'T') nucleotide = THYMINE; else if (c == '\n') exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete"); else nucleotide = ADENINE; pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Scan through node index = 0; while((c = getc(file)) != '\n' && c != EOF) { if (c == 'A') nucleotide = ADENINE; else if (c == 'C') nucleotide = CYTOSINE; else if (c == 'G') nucleotide = GUANINE; else if (c == 'T') nucleotide = THYMINE; else nucleotide = ADENINE; pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } if (!double_strand || compareKmers(&word, &antiWord) <= 0) { copyKmers(&kmerOccurencePtr->kmer, &word); kmerOccurencePtr->nodeID = nodeID; kmerOccurencePtr->position = index; } else { copyKmers(&kmerOccurencePtr->kmer, &antiWord); kmerOccurencePtr->nodeID = -nodeID; kmerOccurencePtr->position = getNodeLength(getNodeInGraph(graph, nodeID)) - 1 - index; } kmerOccurencePtr++; kmerOccurenceIndex++; index++; } if (fgets(line, maxline, file) == NULL) break; } fclose(file); // Sort table qsort(kmerOccurences, kmerCount, sizeof(KmerOccurence), compareKmerOccurences); // Fill up acceleration table if (kmerTable->accelerationTable != NULL) { *accelPtr = (IDnum) 0; for (kmerOccurenceIndex = 0; kmerOccurenceIndex < kmerCount; kmerOccurenceIndex++) { header = keyInAccelerationTable(&kmerOccurences [kmerOccurenceIndex]. kmer, kmerTable); while (lastHeader < header) { lastHeader++; accelPtr++; *accelPtr = kmerOccurenceIndex; } } while (lastHeader < (KmerKey) 1 << accelerationBits) { lastHeader++; accelPtr++; *accelPtr = kmerCount; } } return kmerTable; }