示例#1
0
    void parseFastq(SeqStream input_stream, string filename, SeqSet &data) {

        data.filename = filename;
        char ch;
        string temp = "";
        string nm = "";
        unsigned size_guess = 150; // Seems like it might speed things up
        unsigned line_num = 0;
        bool linebreak = false;
        bool name = false;
        while(!input_stream.eof()) {
            // Check if stream is okay and read a character
            if(!input_stream.good()) {
                throw("Problem reading file");
            }
            input_stream.get(ch);

            // Check for linebreaks. Treat multiple linebreak characters
            // as one linebreak. Also, count the number of lines, and when
            // four lines have been reach, construct a SeqRecord and reset.
            if(ch == '\n' || ch == '\r') {
                if(!linebreak) {
                    line_num += 1;
                    if(line_num == 4) {
                        line_num = 0;
                        SeqRecord rec;
                        rec.setName(nm);
                        rec.append(temp);
                        data.append(rec);
                        size_guess = rec.getSeq().size();
                        nm = "";
                        temp = "";
                        temp.reserve(size_guess);
                        name = false;
                    }
                }
                linebreak = true;
                continue;
            }

            // If this far, not a linebreak
            linebreak = false;

            // For each line of the fastq file
            if(line_num == 0) {
                // Name
                if(!name and ch != '@') {
                    throw("Not in fastq format");
                }
                if(name) {
                    nm += ch;
                }
                name = true;
            } else if(line_num == 1) {
                // Sequence
                temp += ch;
            } else if(line_num == 2) {
                // Plus line - Ignore
                continue;
            } else if(line_num == 3) {
                // Quality scores - ignore
                continue;
            }
        }
    }
示例#2
0
    void parseFasta(SeqStream input_stream, string filename, SeqSet &data) {

        data.filename = filename;

        char ch;
        string temp = "";
        string nm;
        unsigned size_guess = 10000; // Seems like it might speed things up

        // Enclose all of this in a while loop that goes to EOF:
        input_stream.get(ch);
        if(ch != '>') {
            throw("Not in FASTA format");
        }


        bool inseq = false;
        bool linebreak = false;
        while(!input_stream.eof()) {
            SeqRecord rec;
            rec.reserve(size_guess);
            nm = "";
            while (true && !inseq) {
                if(!input_stream.good()) {
                    throw("Problem reading file");
                }
                input_stream.get(ch);
                if (ch == '\n' || ch == '\r')
                    inseq = true;
                nm += ch;
            }
            rec.setName(nm);

            temp = "";
            while(inseq){
                input_stream.get(ch);
                if(input_stream.eof())
                    break;

                // ">" after a linebreak means a new name
                if(ch == '>' && linebreak) {
                    inseq = false;
                    linebreak = false;
                    continue;
                }

                // Ignore, but note linebreaks
                linebreak = false;
                if(ch == '\n' || ch == '\r') {
                    linebreak = true;
                    continue;
                }

                // Ignore whitespace
                if(ch == ' ' || ch == '\t') {
                    continue;
                }

                temp += ch;
            }
            rec.append(temp);
            data.append(rec);
            size_guess = rec.getSeq().size();
        }
    }
示例#3
0
    void parseFasta(string filename, SeqSet &data) {

        //try {
            ifstream input(filename.c_str(), ifstream::in);

            data.filename = filename;

            char ch;
            string temp = "";
            string nm;

            // Enclose all of this in a while loop that goes to EOF:
            input.get(ch);
            if(ch != '>') {
                throw("Not in FASTA format");
            }


            bool inseq = false;
            bool linebreak = false;
            while(!input.eof()) {
                SeqRecord rec;
                nm = "";
                while (true && !inseq) {
                    input.get(ch);
                    if (ch == '\n' || ch == '\r')
                        inseq = true;
                    nm += ch;
                }
                rec.setName(nm);

                temp = "";
                while(inseq){
                    input.get(ch);
                    if(input.eof())
                        break;

                    // ">" after a linebreak means a new name
                    if(ch == '>' && linebreak) {
                        inseq = false;
                        linebreak = false;
                        continue;
                    }

                    // Ignore, but note linebreaks
                    linebreak = false;
                    if(ch == '\n' || ch == '\r') {
                        linebreak = true;
                        continue;
                    }

                    // Ignore whitespace
                    if(ch == ' ' || ch == '\t') {
                        continue;
                    }

                    temp += ch;
                }
                rec.append(temp);
                data.append(rec);
            }
        //} catch (...) {
        //    throw("Problem parsing file");
        //}
    }