예제 #1
0
CRef<CSeq_entry>
CShortReadFastaInputSource::x_ReadFastaOneSeq(CRef<ILineReader> line_reader)
{
    int start = 0;
    // parse the last read defline
    CTempString line = **line_reader;
    CTempString id = x_ParseDefline(line);
    CRef<CSeq_id> seqid(new CSeq_id);
    seqid->Set(CSeq_id::e_Local, id);
    ++(*line_reader);
    line = **line_reader;
    while (line[0] != '>') {

        // ignore empty lines
        if (line.empty() && !line_reader->AtEOF()) {
            ++(*line_reader);
            line = **line_reader;
            continue;
        }

        // copy the sequence
        // increase the sequence buffer if necessary
        if (start + line.length() + 1 > m_SeqBuffLen) {
            string tmp;
            m_SeqBuffLen = 2 * (start + line.length() + 1);
            tmp.reserve(m_SeqBuffLen);
            memcpy(&tmp[0], &m_Sequence[0], start);
            m_Sequence.swap(tmp);
        }
        memcpy(&m_Sequence[start], line.data(), line.length());
        start += line.length();

        if (line_reader->AtEOF()) {
            break;
        }

        // read next line
        ++(*line_reader);
        line = **line_reader;
    }

    // set up sequence
    if (start > 0) {
        CRef<CSeq_entry> seq_entry(new CSeq_entry);
        CBioseq& bioseq = seq_entry->SetSeq();
        bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
        bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
        bioseq.SetId().clear();
        bioseq.SetId().push_back(seqid);
        bioseq.SetInst().SetLength(start);
        m_Sequence[start] = 0;
        bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(&m_Sequence[0]));
        bioseq.SetDescr();

        m_BasesAdded += start;
        return seq_entry;
    }

    return CRef<CSeq_entry>();
}
예제 #2
0
static CProt_ref::EECNumberFileStatus s_LoadECNumberTable(const string& dir, const string& name,
                                const char* const *fallback,
                                size_t fallback_count,
                                CProt_ref::EECNumberStatus status)
{
    CRef<ILineReader> lr;
    CProt_ref::EECNumberFileStatus rval = CProt_ref::eECFile_not_attempted;

    if ( !dir.empty() ) {
        lr.Reset(ILineReader::New
                 (CDirEntry::MakePath(dir, "ecnum_" + name, "txt")));
        rval = CProt_ref::eECFile_not_found;
    }
    if (lr.Empty()) {
        while (fallback_count--) {
            s_ProcessECNumberLine(*fallback++, status);
        }
    } else {
        rval = CProt_ref::eECFile_not_read;
        do {
            s_ProcessECNumberLine(*++*lr, status);
            rval = CProt_ref::eECFile_read;
        } while ( !lr->AtEOF() );
    }
    return rval;
}
예제 #3
0
static void s_LoadECNumberTable(const string& dir, const string& name,
                                const char* const *fallback,
                                size_t fallback_count,
                                CProt_ref::EECNumberStatus status)
{
    CRef<ILineReader> lr;
    if ( !dir.empty() ) {
        lr.Reset(ILineReader::New
                 (CDirEntry::MakePath(dir, "ecnum_" + name, "txt")));
    }
    if (lr.Empty()) {
        while (fallback_count--) {
            s_ProcessECNumberLine(*fallback++, status);
        }
    } else {
        do {
            s_ProcessECNumberLine(*++*lr, status);
        } while ( !lr->AtEOF() );
    }
}
예제 #4
0
CRef<CSeq_entry>
CShortReadFastaInputSource::x_ReadFastqOneSeq(CRef<ILineReader> line_reader)
{
    CTempString line;
    CTempString id;
    CRef<CSeq_entry> retval;

    // first read defline
    ++(*line_reader);
    line = **line_reader;

    // skip empty lines
    while (!line_reader->AtEOF() && line.empty()) {
        ++(*line_reader);
        line = **line_reader;
    }

    if (line[0] != '@') {
        NCBI_THROW(CInputException, eInvalidInput, (string)"FASTQ parse error:"
                   " defline expected at line: " +
                   NStr::IntToString(line_reader->GetLineNumber()));
    }

    id = x_ParseDefline(line);
    CRef<CSeq_id> seqid(new CSeq_id);
    seqid->Set(CSeq_id::e_Local, id);

    // read sequence
    ++(*line_reader);
    line = **line_reader;
    // skip empty lines
    while (!line_reader->AtEOF() && line.empty()) {
        ++(*line_reader);
        line = **line_reader;
    }

    // set up sequence
    if (line.length() > 0) {
        CRef<CSeq_entry> seq_entry(new CSeq_entry);
        CBioseq& bioseq = seq_entry->SetSeq();
        bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
        bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
        bioseq.SetId().clear();
        bioseq.SetId().push_back(seqid);
        bioseq.SetInst().SetLength(line.length());
        bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(line.data()));
        bioseq.SetDescr();

        m_BasesAdded += line.length();
        retval = seq_entry;
    }
    
    // read and skip second defline
    ++(*line_reader);
    line = **line_reader;
    // skip empty lines
    while (!line_reader->AtEOF() && line.empty()) {
        ++(*line_reader);
        line = **line_reader;
    }

    if (line[0] != '+') {
        NCBI_THROW(CInputException, eInvalidInput, (string)"FASTQ parse error:"
                   " defline expected at line: " +
                   NStr::IntToString(line_reader->GetLineNumber()));
    }

    // read and skip quality scores
    ++(*line_reader);
    line = **line_reader;
    // skip empty lines
    while (!line_reader->AtEOF() && line.empty()) {
        ++(*line_reader);
        line = **line_reader;
    }

    return retval;
}