CRef<CSeq_entry> CShortReadFastaInputSource::x_ReadFastaOneSeq(CRef<ILineReader> line_reader) { int start = 0; // parse the last read defline CTempString line = **line_reader; CTempString id = x_ParseDefline(line); CRef<CSeq_id> seqid(new CSeq_id); seqid->Set(CSeq_id::e_Local, id); ++(*line_reader); line = **line_reader; while (line[0] != '>') { // ignore empty lines if (line.empty() && !line_reader->AtEOF()) { ++(*line_reader); line = **line_reader; continue; } // copy the sequence // increase the sequence buffer if necessary if (start + line.length() + 1 > m_SeqBuffLen) { string tmp; m_SeqBuffLen = 2 * (start + line.length() + 1); tmp.reserve(m_SeqBuffLen); memcpy(&tmp[0], &m_Sequence[0], start); m_Sequence.swap(tmp); } memcpy(&m_Sequence[start], line.data(), line.length()); start += line.length(); if (line_reader->AtEOF()) { break; } // read next line ++(*line_reader); line = **line_reader; } // set up sequence if (start > 0) { CRef<CSeq_entry> seq_entry(new CSeq_entry); CBioseq& bioseq = seq_entry->SetSeq(); bioseq.SetInst().SetMol(CSeq_inst::eMol_na); bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw); bioseq.SetId().clear(); bioseq.SetId().push_back(seqid); bioseq.SetInst().SetLength(start); m_Sequence[start] = 0; bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(&m_Sequence[0])); bioseq.SetDescr(); m_BasesAdded += start; return seq_entry; } return CRef<CSeq_entry>(); }
static bool IsInteger(const CTempString& value) { if (value.empty()) return false; const char* digit = value.end(); while (--digit > value.begin()) if (!isdigit(*digit)) return false; return isdigit(*digit) || (*digit == '-' && value.length() > 1); }
bool g_FixMisplacedPID(CJsonNode& stat_info, CTempString& executable_path, const char* pid_key) { SIZE_TYPE misplaced_pid = NStr::Find(executable_path, "; PID: "); if (misplaced_pid == NPOS) return false; SIZE_TYPE pos = misplaced_pid + sizeof("; PID: ") - 1; stat_info.SetInteger(pid_key, NStr::StringToInt8( CTempString(executable_path.data() + pos, executable_path.length() - pos))); executable_path.erase(misplaced_pid); return true; }
// ---------------------------------------------------------------------------- void CUCSCRegionReader::xSmartFieldSplit(vector<string>& fields, CTempString line) { NStr::Split(line, " \t.-:", fields, NStr::fSplit_Tokenize); if (line[line.length()-1] == '-') fields.push_back("-"); while (fields.size() > 3) { if (fields.size() == 4 && (fields.back() == "+" || fields.back() == "-")) break; // try to merge first column size_t len = fields[0].length(); if (line[len] == '.') { fields[0] += line[len]; fields[0] += fields[1]; fields.erase(fields.begin()+1); } else { break; } } }
BEGIN_NCBI_SCOPE static void NormalizeStatKeyName(CTempString& key) { char* begin = const_cast<char*>(key.data()); char* end = begin + key.length(); while (begin < end && !isalnum(*begin)) ++begin; while (begin < end && !isalnum(end[-1])) --end; if (begin == end) { key = "_"; return; } key.assign(begin, end - begin); for (; begin < end; ++begin) *begin = isalnum(*begin) ? tolower(*begin) : '_'; }
inline CLightString::CLightString(const CTempString& str) : m_String(str.data()), m_Length(str.length()) { }
CRef<CSeq_entry> CShortReadFastaInputSource::x_ReadFastqOneSeq(CRef<ILineReader> line_reader) { CTempString line; CTempString id; CRef<CSeq_entry> retval; // first read defline ++(*line_reader); line = **line_reader; // skip empty lines while (!line_reader->AtEOF() && line.empty()) { ++(*line_reader); line = **line_reader; } if (line[0] != '@') { NCBI_THROW(CInputException, eInvalidInput, (string)"FASTQ parse error:" " defline expected at line: " + NStr::IntToString(line_reader->GetLineNumber())); } id = x_ParseDefline(line); CRef<CSeq_id> seqid(new CSeq_id); seqid->Set(CSeq_id::e_Local, id); // read sequence ++(*line_reader); line = **line_reader; // skip empty lines while (!line_reader->AtEOF() && line.empty()) { ++(*line_reader); line = **line_reader; } // set up sequence if (line.length() > 0) { CRef<CSeq_entry> seq_entry(new CSeq_entry); CBioseq& bioseq = seq_entry->SetSeq(); bioseq.SetInst().SetMol(CSeq_inst::eMol_na); bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw); bioseq.SetId().clear(); bioseq.SetId().push_back(seqid); bioseq.SetInst().SetLength(line.length()); bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(line.data())); bioseq.SetDescr(); m_BasesAdded += line.length(); retval = seq_entry; } // read and skip second defline ++(*line_reader); line = **line_reader; // skip empty lines while (!line_reader->AtEOF() && line.empty()) { ++(*line_reader); line = **line_reader; } if (line[0] != '+') { NCBI_THROW(CInputException, eInvalidInput, (string)"FASTQ parse error:" " defline expected at line: " + NStr::IntToString(line_reader->GetLineNumber())); } // read and skip quality scores ++(*line_reader); line = **line_reader; // skip empty lines while (!line_reader->AtEOF() && line.empty()) { ++(*line_reader); line = **line_reader; } return retval; }