void parseFastq(SeqStream input_stream, string filename, SeqSet &data) { data.filename = filename; char ch; string temp = ""; string nm = ""; unsigned size_guess = 150; // Seems like it might speed things up unsigned line_num = 0; bool linebreak = false; bool name = false; while(!input_stream.eof()) { // Check if stream is okay and read a character if(!input_stream.good()) { throw("Problem reading file"); } input_stream.get(ch); // Check for linebreaks. Treat multiple linebreak characters // as one linebreak. Also, count the number of lines, and when // four lines have been reach, construct a SeqRecord and reset. if(ch == '\n' || ch == '\r') { if(!linebreak) { line_num += 1; if(line_num == 4) { line_num = 0; SeqRecord rec; rec.setName(nm); rec.append(temp); data.append(rec); size_guess = rec.getSeq().size(); nm = ""; temp = ""; temp.reserve(size_guess); name = false; } } linebreak = true; continue; } // If this far, not a linebreak linebreak = false; // For each line of the fastq file if(line_num == 0) { // Name if(!name and ch != '@') { throw("Not in fastq format"); } if(name) { nm += ch; } name = true; } else if(line_num == 1) { // Sequence temp += ch; } else if(line_num == 2) { // Plus line - Ignore continue; } else if(line_num == 3) { // Quality scores - ignore continue; } } }
void parseFasta(SeqStream input_stream, string filename, SeqSet &data) { data.filename = filename; char ch; string temp = ""; string nm; unsigned size_guess = 10000; // Seems like it might speed things up // Enclose all of this in a while loop that goes to EOF: input_stream.get(ch); if(ch != '>') { throw("Not in FASTA format"); } bool inseq = false; bool linebreak = false; while(!input_stream.eof()) { SeqRecord rec; rec.reserve(size_guess); nm = ""; while (true && !inseq) { if(!input_stream.good()) { throw("Problem reading file"); } input_stream.get(ch); if (ch == '\n' || ch == '\r') inseq = true; nm += ch; } rec.setName(nm); temp = ""; while(inseq){ input_stream.get(ch); if(input_stream.eof()) break; // ">" after a linebreak means a new name if(ch == '>' && linebreak) { inseq = false; linebreak = false; continue; } // Ignore, but note linebreaks linebreak = false; if(ch == '\n' || ch == '\r') { linebreak = true; continue; } // Ignore whitespace if(ch == ' ' || ch == '\t') { continue; } temp += ch; } rec.append(temp); data.append(rec); size_guess = rec.getSeq().size(); } }
void parseFasta(string filename, SeqSet &data) { //try { ifstream input(filename.c_str(), ifstream::in); data.filename = filename; char ch; string temp = ""; string nm; // Enclose all of this in a while loop that goes to EOF: input.get(ch); if(ch != '>') { throw("Not in FASTA format"); } bool inseq = false; bool linebreak = false; while(!input.eof()) { SeqRecord rec; nm = ""; while (true && !inseq) { input.get(ch); if (ch == '\n' || ch == '\r') inseq = true; nm += ch; } rec.setName(nm); temp = ""; while(inseq){ input.get(ch); if(input.eof()) break; // ">" after a linebreak means a new name if(ch == '>' && linebreak) { inseq = false; linebreak = false; continue; } // Ignore, but note linebreaks linebreak = false; if(ch == '\n' || ch == '\r') { linebreak = true; continue; } // Ignore whitespace if(ch == ' ' || ch == '\t') { continue; } temp += ch; } rec.append(temp); data.append(rec); } //} catch (...) { // throw("Problem parsing file"); //} }