Esempio n. 1
0
CRef<CSeq_entry>
CShortReadFastaInputSource::x_ReadFastaOneSeq(CRef<ILineReader> line_reader)
{
    int start = 0;
    // parse the last read defline
    CTempString line = **line_reader;
    CTempString id = x_ParseDefline(line);
    CRef<CSeq_id> seqid(new CSeq_id);
    seqid->Set(CSeq_id::e_Local, id);
    ++(*line_reader);
    line = **line_reader;
    while (line[0] != '>') {

        // ignore empty lines
        if (line.empty() && !line_reader->AtEOF()) {
            ++(*line_reader);
            line = **line_reader;
            continue;
        }

        // copy the sequence
        // increase the sequence buffer if necessary
        if (start + line.length() + 1 > m_SeqBuffLen) {
            string tmp;
            m_SeqBuffLen = 2 * (start + line.length() + 1);
            tmp.reserve(m_SeqBuffLen);
            memcpy(&tmp[0], &m_Sequence[0], start);
            m_Sequence.swap(tmp);
        }
        memcpy(&m_Sequence[start], line.data(), line.length());
        start += line.length();

        if (line_reader->AtEOF()) {
            break;
        }

        // read next line
        ++(*line_reader);
        line = **line_reader;
    }

    // set up sequence
    if (start > 0) {
        CRef<CSeq_entry> seq_entry(new CSeq_entry);
        CBioseq& bioseq = seq_entry->SetSeq();
        bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
        bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
        bioseq.SetId().clear();
        bioseq.SetId().push_back(seqid);
        bioseq.SetInst().SetLength(start);
        m_Sequence[start] = 0;
        bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(&m_Sequence[0]));
        bioseq.SetDescr();

        m_BasesAdded += start;
        return seq_entry;
    }

    return CRef<CSeq_entry>();
}
Esempio n. 2
0
static bool IsInteger(const CTempString& value)
{
    if (value.empty())
        return false;

    const char* digit = value.end();

    while (--digit > value.begin())
        if (!isdigit(*digit))
            return false;

    return isdigit(*digit) || (*digit == '-' && value.length() > 1);
}
Esempio n. 3
0
bool g_FixMisplacedPID(CJsonNode& stat_info, CTempString& executable_path,
        const char* pid_key)
{
    SIZE_TYPE misplaced_pid = NStr::Find(executable_path, "; PID: ");
    if (misplaced_pid == NPOS)
        return false;

    SIZE_TYPE pos = misplaced_pid + sizeof("; PID: ") - 1;
    stat_info.SetInteger(pid_key, NStr::StringToInt8(
            CTempString(executable_path.data() + pos,
                    executable_path.length() - pos)));
    executable_path.erase(misplaced_pid);
    return true;
}
Esempio n. 4
0
//  ----------------------------------------------------------------------------
void CUCSCRegionReader::xSmartFieldSplit(vector<string>& fields, CTempString line)
{
    NStr::Split(line, " \t.-:", fields, NStr::fSplit_Tokenize);
    if (line[line.length()-1] == '-')
        fields.push_back("-");
    while (fields.size() > 3)
    {
        if (fields.size() == 4 && (fields.back() == "+" || fields.back() == "-"))
            break;
        // try to merge first column
        size_t len = fields[0].length();
        if (line[len] == '.')
        {
            fields[0] += line[len];
            fields[0] += fields[1];
            fields.erase(fields.begin()+1);
        } else {
            break;
        }
    }
}
Esempio n. 5
0
BEGIN_NCBI_SCOPE

static void NormalizeStatKeyName(CTempString& key)
{
    char* begin = const_cast<char*>(key.data());
    char* end = begin + key.length();

    while (begin < end && !isalnum(*begin))
        ++begin;

    while (begin < end && !isalnum(end[-1]))
        --end;

    if (begin == end) {
        key = "_";
        return;
    }

    key.assign(begin, end - begin);

    for (; begin < end; ++begin)
        *begin = isalnum(*begin) ? tolower(*begin) : '_';
}
Esempio n. 6
0
inline
CLightString::CLightString(const CTempString& str)
    : m_String(str.data()), m_Length(str.length())
{
}
Esempio n. 7
0
CRef<CSeq_entry>
CShortReadFastaInputSource::x_ReadFastqOneSeq(CRef<ILineReader> line_reader)
{
    CTempString line;
    CTempString id;
    CRef<CSeq_entry> retval;

    // first read defline
    ++(*line_reader);
    line = **line_reader;

    // skip empty lines
    while (!line_reader->AtEOF() && line.empty()) {
        ++(*line_reader);
        line = **line_reader;
    }

    if (line[0] != '@') {
        NCBI_THROW(CInputException, eInvalidInput, (string)"FASTQ parse error:"
                   " defline expected at line: " +
                   NStr::IntToString(line_reader->GetLineNumber()));
    }

    id = x_ParseDefline(line);
    CRef<CSeq_id> seqid(new CSeq_id);
    seqid->Set(CSeq_id::e_Local, id);

    // read sequence
    ++(*line_reader);
    line = **line_reader;
    // skip empty lines
    while (!line_reader->AtEOF() && line.empty()) {
        ++(*line_reader);
        line = **line_reader;
    }

    // set up sequence
    if (line.length() > 0) {
        CRef<CSeq_entry> seq_entry(new CSeq_entry);
        CBioseq& bioseq = seq_entry->SetSeq();
        bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
        bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
        bioseq.SetId().clear();
        bioseq.SetId().push_back(seqid);
        bioseq.SetInst().SetLength(line.length());
        bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(line.data()));
        bioseq.SetDescr();

        m_BasesAdded += line.length();
        retval = seq_entry;
    }
    
    // read and skip second defline
    ++(*line_reader);
    line = **line_reader;
    // skip empty lines
    while (!line_reader->AtEOF() && line.empty()) {
        ++(*line_reader);
        line = **line_reader;
    }

    if (line[0] != '+') {
        NCBI_THROW(CInputException, eInvalidInput, (string)"FASTQ parse error:"
                   " defline expected at line: " +
                   NStr::IntToString(line_reader->GetLineNumber()));
    }

    // read and skip quality scores
    ++(*line_reader);
    line = **line_reader;
    // skip empty lines
    while (!line_reader->AtEOF() && line.empty()) {
        ++(*line_reader);
        line = **line_reader;
    }

    return retval;
}