void CVDBMgr::x_Init(void) { if ( rc_t rc = VDBManagerMakeRead(x_InitPtr(), 0) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot open VDBManager", rc); } uint32_t sdk_ver; if ( rc_t rc = VDBManagerVersion(*this, &sdk_ver) ) { NCBI_THROW2(CSraException, eInitFailed, "Cannot get VDBManager version", rc); } CKNSManager kns_mgr(CVFSManager(*this)); CNcbiOstrstream str; CNcbiApplication* app = CNcbiApplication::Instance(); if ( app ) { str << app->GetAppName() << ": " << app->GetVersion().Print() << "; "; } #if NCBI_PACKAGE str << "Package: " << NCBI_PACKAGE_NAME << ' ' << NCBI_PACKAGE_VERSION << "; "; #endif str << "C++ "; #ifdef NCBI_PRODUCTION_VER str << NCBI_PRODUCTION_VER << "/"; #endif #ifdef NCBI_DEVELOPMENT_VER str << NCBI_DEVELOPMENT_VER; #endif string prefix = CNcbiOstrstreamToString(str); KNSManagerSetUserAgent(kns_mgr, "%s; SRA Toolkit %V", prefix.c_str(), sdk_ver); // redirect VDB log to C++ Toolkit if ( s_GetDiagHandler() ) { KLogInit(); KLogLevelSet(klogDebug); KLogLibHandlerSet(VDBLogWriter, 0); } if ( app ) { string host = app->GetConfig().GetString("CONN", "HTTP_PROXY_HOST", kEmptyStr); int port = app->GetConfig().GetInt("CONN", "HTTP_PROXY_PORT", 0); if ( !host.empty() && port != 0 ) { if ( rc_t rc = KNSManagerSetHTTPProxyPath(kns_mgr, "%s:%d", host.c_str(), port) ) { NCBI_THROW2(CSraException, eInitFailed, "Cannot set KNSManager proxy parameters", rc); } KNSManagerSetHTTPProxyEnabled(kns_mgr, true); } } }
void CExecAndParseStructuredOutput::ThrowUnexpectedCharError() { size_t position = m_Ch - m_NSOutput.data() + 1; if (*m_Ch == '\0') { NCBI_THROW2(CStringException, eFormat, "Unexpected end of NetSchedule output", position); } else { NCBI_THROW2(CStringException, eFormat, "Unexpected character in NetSchedule output", position); } }
void CSraPath::x_Init(void) { CSraMgr::RegisterFunctions(); if ( rc_t rc = SRAPathMake(x_InitPtr(), 0) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot make SRAPath", rc); } if ( rc_t rc = SRAPathClear(*this) ) { NCBI_THROW2(CSraException, eInitFailed, "Cannot clear SRAPath", rc); } }
void CKConfig::Commit() const { if ( rc_t rc = KConfigCommit(const_cast<KConfig*>(GetPointer())) ) { NCBI_THROW2(CSraException, eOtherError, "CKConfig: Cannot commit config changes", rc); } }
void CVDBMgr::DeleteCacheOlderThan(Uint4 days) { if ( rc_t rc = VDBManagerDeleteCacheOlderThan(*this, days) ) { NCBI_THROW2(CSraException, eOtherError, "CVDBMgr: Cannot delete old cache files", rc); } }
void CVFSManager::x_InitNew(void) { if ( rc_t rc = VFSManagerMake(x_InitPtr()) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot create VFSManager", rc); } }
CKDBManager::CKDBManager(const CVDBMgr& mgr) { if ( rc_t rc = VDBManagerGetKDBManagerRead(mgr, x_InitPtr()) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot get KDBManager", rc); } }
CKNSManager::CKNSManager(const CVFSManager& mgr) { if ( rc_t rc = VFSManagerGetKNSMgr(mgr, x_InitPtr()) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot get KNSManager", rc); } }
void CVDBMgr::SetCacheRoot(const string& path) { CVPath vpath(CVFSManager(*this), path, CVPath::eSys); if ( rc_t rc = VDBManagerSetCacheRoot(*this, vpath) ) { NCBI_THROW2(CSraException, eOtherError, "CVDBMgr: Cannot set cache root", rc); } }
void CSraMgr::x_DoInit(void) { if ( rc_t rc = SRAMgrMakeRead(x_InitPtr()) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot open SRAMgr", rc); } }
CVResolver::CVResolver(const CVFSManager& mgr, const CKConfig& cfg) : m_Mgr(mgr) { if ( rc_t rc = VFSManagerMakeResolver(mgr, x_InitPtr(), cfg) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot create VResolver", rc); } }
CKConfig::CKConfig(const CVDBMgr& mgr) { *x_InitPtr() = VFSManagerGetConfig(CVFSManager(mgr)); if ( rc_t rc = KConfigAddRef(*this) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot get reference to KConfig", rc); } }
CKConfig::CKConfig(void) { KConfig* cfg; if ( rc_t rc = KConfigMake(&cfg, 0) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot create KConfig", rc); } *x_InitPtr() = cfg; }
string CVPath::ToString(EType type) const { const String* str = 0; if (type == eSys) { if (rc_t rc = VPathMakeSysPath(*this, &str)) { NCBI_THROW2(CSraException, eOtherError, "Cannot get path from VPath", rc); } } else { if (rc_t rc = VPathMakeString(*this, &str)) { NCBI_THROW2(CSraException, eOtherError, "Cannot get path from VPath", rc); } } string ret(str->addr, str->size); StringWhack(str); return ret; }
void CVDBCursor::CloseRow(void) { if ( !RowIsOpened() ) { return; } if ( rc_t rc = VCursorCloseRow(*this) ) { NCBI_THROW2(CSraException, eInitFailed, "Cannot close VDB cursor row", rc); } m_RowOpened = false; }
void CVDBCursor::Init(const CVDBTable& table) { if ( *this ) { NCBI_THROW2(CSraException, eInvalidState, "Cannot init VDB cursor again", RC(rcApp, rcCursor, rcConstructing, rcSelf, rcOpen)); } if ( rc_t rc = VTableCreateCursorRead(table, x_InitPtr()) ) { *x_InitPtr() = 0; NCBI_THROW2(CSraException, eInitFailed, "Cannot create VDB cursor", rc); } if ( rc_t rc = VCursorPermitPostOpenAdd(*this) ) { NCBI_THROW2(CSraException, eInitFailed, "Cannot allow VDB cursor post open column add", rc); } if ( rc_t rc = VCursorOpen(*this) ) { NCBI_THROW2(CSraException, eInitFailed, "Cannot open VDB cursor", rc); } m_Table = table; }
string CVDBMgr::GetCacheRoot() const { const VPath* ret; if ( rc_t rc = VDBManagerGetCacheRoot(*this, &ret) ) { if ( GetRCObject(rc) == RCObject(rcPath) && GetRCState(rc) == rcNotFound ) { return kEmptyStr; } NCBI_THROW2(CSraException, eOtherError, "CVDBMgr: Cannot get cache root", rc); } return CVPath(ret).ToString(CVPath::eSys); }
// ---------------------------------------------------------------------------- void CMultiReaderApp::xProcessDefault( const CArgs& args, CNcbiIstream& istr, CNcbiOstream& ostr) // ---------------------------------------------------------------------------- { auto_ptr<CReaderBase> pReader(CReaderBase::GetReader(m_uFormat, m_iFlags)); if (!pReader.get()) { NCBI_THROW2(CObjReaderParseException, eFormat, "File format not supported", 0); } CRef<CSerialObject> object = pReader->ReadObject(istr, m_pErrors); xWriteObject(*object, ostr); }
CExprValue::CExprValue(Uint8 value) : ival(0) , m_sval("") , m_Var(NULL) , m_Pos(0) , m_Tag(eINT) { if (static_cast<Uint8>(numeric_limits<Int8>::max()) < value) { NCBI_THROW2(CExprParserException, eTypeConversionError, "Value too big to fit in the 8-byte signed integer type", m_Pos); } ival = static_cast<Int8>(value); }
Int8 CNetScheduleStructuredOutputParser::ParseInt(size_t len) { Int8 val = NStr::StringToInt8(CTempString(m_Ch, len)); if (*m_Ch == '-') { ++m_Ch; --len; } if (*m_Ch == '0' && len > 1) { NCBI_THROW2(CStringException, eFormat, "Leading zeros are not allowed", GetPosition()); } m_Ch += len; return val; }
CVDBTable::CVDBTable(const CVDBMgr& mgr, const string& acc_or_path, EMissing missing) : m_Name(acc_or_path) { *x_InitPtr() = 0; VSchema *schema; DECLARE_SDK_GUARD(); if ( rc_t rc = SRASchemaMake(&schema, mgr) ) { NCBI_THROW2(CSraException, eInitFailed, "Cannot make default SRA schema", rc); } string path = CVPath::ConvertAccOrSysPathToPOSIX(acc_or_path); if ( rc_t rc = VDBManagerOpenTableRead(mgr, x_InitPtr(), schema, "%.*s", int(path.size()), path.data()) ) { *x_InitPtr() = 0; VSchemaRelease(schema); if ( (GetRCObject(rc) == RCObject(rcDirectory) || GetRCObject(rc) == RCObject(rcPath)) && GetRCState(rc) == rcNotFound ) { // no SRA accession if ( missing != eMissing_Throw ) { return; } NCBI_THROW2_FMT(CSraException, eNotFoundTable, "Cannot open SRA table: "<<acc_or_path, rc); } else if ( GetRCObject(rc) == RCObject(rcDatabase) && GetRCState(rc) == rcIncorrect ) { // invalid SRA database NCBI_THROW2_FMT(CSraException, eDataError, "Cannot open SRA table: "<<acc_or_path, rc); } else { // other errors NCBI_THROW2_FMT(CSraException, eOtherError, "Cannot open SRA table: "<<acc_or_path, rc); } } VSchemaRelease(schema); }
CJsonNode CNetScheduleStructuredOutputParser::ParseValue() { size_t max_len = GetRemainder(); size_t len = 0; switch (*m_Ch) { /* Array */ case '[': ++m_Ch; return ParseArray(']'); /* Object */ case '{': ++m_Ch; return ParseObject('}'); /* String */ case '\'': case '"': return CJsonNode::NewStringNode(ParseString(max_len)); /* Number */ case '-': // Check that there's at least one digit after the minus sign. if (max_len <= 1 || !isdigit((unsigned char) m_Ch[1])) { ++m_Ch; break; } len = 1; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // Skim through the integer part. do if (++len >= max_len) return CJsonNode::NewIntegerNode(ParseInt(len)); while (isdigit((unsigned char) m_Ch[len])); // Stumbled upon a non-digit character -- check // if it's a fraction part or an exponent part. switch (m_Ch[len]) { case '.': if (++len == max_len || !isdigit((unsigned char) m_Ch[len])) { NCBI_THROW2(CStringException, eFormat, "At least one digit after the decimal " "point is required", GetPosition()); } for (;;) { if (++len == max_len) return CJsonNode::NewDoubleNode(ParseDouble(len)); if (!isdigit((unsigned char) m_Ch[len])) { if (m_Ch[len] == 'E' || m_Ch[len] == 'e') break; return CJsonNode::NewDoubleNode(ParseDouble(len)); } } /* FALL THROUGH */ case 'E': case 'e': if (++len == max_len || (m_Ch[len] == '-' || m_Ch[len] == '+' ? ++len == max_len || !isdigit((unsigned char) m_Ch[len]) : !isdigit((unsigned char) m_Ch[len]))) { m_Ch += len; NCBI_THROW2(CStringException, eFormat, "Invalid exponent specification", GetPosition()); } while (++len < max_len && isdigit((unsigned char) m_Ch[len])) ; return CJsonNode::NewDoubleNode(ParseDouble(len)); default: return CJsonNode::NewIntegerNode(ParseInt(len)); } /* Constant */ case 'F': case 'f': case 'N': case 'n': case 'T': case 't': case 'Y': case 'y': while (len <= max_len && isalpha((unsigned char) m_Ch[len])) ++len; { CTempString val(m_Ch, len); m_Ch += len; return val == "null" ? CJsonNode::NewNullNode() : CJsonNode::NewBooleanNode(NStr::StringToBool(val)); } } INVALID_FORMAT_ERROR(); }
CDecimal& CDecimal::operator=(const CTempStringEx& s) { int ptr = 0; char c = s[ptr++]; // skip leading blanks while ( isspace((unsigned char)c) ) { c = s[ptr++]; } m_Sign = 0; if ( c == '-' ) { m_Sign = -1; c = s[ptr++]; } else if ( c == '+' ) { m_Sign = +1; c = s[ptr++]; } bool dot = false, expn = false, anydigits = false; int dot_position = 0; m_Mantissa.erase(); // up to exponent for ( ; ; c = s[ptr++] ) { if (c >= '0' && c <= '9') { // digits: accumulate anydigits = true; if ( m_Mantissa.empty() ) { if ( c != '0' ) { m_Mantissa += c; } else { if ( dot ) { --dot_position; } } } else { m_Mantissa += c; } } else if (c == '.') { // dot // if second dot, stop if (dot) { --ptr; break; } dot_position = m_Mantissa.size(); dot = true; } else if (c == 'e' || c == 'E') { // if exponent, stop if (!anydigits) { --ptr; break; } expn = true; break; } else if (!c) { --ptr; break; } else { --ptr; NCBI_THROW2(CStringException, eConvert, "Cannot convert '"+string(s)+"'", ptr); } } // if no digits, stop now - error if (!anydigits) { NCBI_THROW2(CStringException, eConvert, "Cannot convert '"+string(s)+"'", ptr); } int exponent = dot ? dot_position - m_Mantissa.size() : 0; // read exponent if (expn && s[ptr]) { int expvalue = 0; bool expsign = false, expnegate= false; int expdigits= 0; for( ; ; ++ptr) { c = s[ptr]; // sign: should be no digits at this point if (c == '-' || c == '+') { // if there was sign or digits, stop if (expsign || expdigits) { break; } expsign = true; expnegate = c == '-'; } // digits: accumulate else if (c >= '0' && c <= '9') { ++expdigits; int newexpvalue = expvalue*10 + (c-'0'); if (newexpvalue > expvalue) { expvalue = newexpvalue; } } else { break; } } // if no digits, rollback if (!expdigits) { // rollback sign if (expsign) { --ptr; } // rollback exponent if (expn) { --ptr; } } else { exponent = expnegate ? exponent - expvalue : exponent + expvalue; } } m_Exponent = exponent+m_Mantissa.size(); if ( !m_Sign && !m_Mantissa.empty() ) { m_Sign = 1; } Normalize(); return *this; }
void CAlnReader::Read(bool guess, bool generate_local_ids) { if (m_ReadDone) { return; } // make a SSequenceInfo corresponding to our CSequenceInfo argument SSequenceInfo info; info.alphabet = const_cast<char *>(m_Alphabet.c_str()); info.beginning_gap = const_cast<char *>(m_BeginningGap.c_str()); info.end_gap = const_cast<char *>(m_EndGap.c_str());; info.middle_gap = const_cast<char *>(m_MiddleGap.c_str()); info.missing = const_cast<char *>(m_Missing.c_str()); info.match = const_cast<char *>(m_Match.c_str()); // read the alignment stream TAlignmentFilePtr afp; m_Errors.clear(); afp = ReadAlignmentFile2(s_ReadLine, (void *) &m_IS, s_ReportError, &(m_Errors), &info, (generate_local_ids ? eTrue : eFalse)); if (!afp) { NCBI_THROW2(CObjReaderParseException, eFormat, "Error reading alignment", 0); } int first_len = strlen (afp->sequences[0]); for (int i = 1; i < afp->num_sequences; i++) { if (strlen (afp->sequences[i]) != first_len) { AlignmentFileFree (afp); NCBI_THROW2(CObjReaderParseException, eFormat, "Error reading alignment: Not all sequences have same length", 0); } } // if we're trying to guess whether this is an alignment file, // and no tell-tale alignment format lines were found, // check to see if any of the lines contain gaps. // no gaps plus no alignment indicators -> don't guess alignment if (guess && !afp->align_format_found) { bool found_gap = false; for (int i = 0; i < afp->num_sequences && !found_gap; i++) { if (strchr (afp->sequences[i], '-') != NULL) { found_gap = true; } } if (!found_gap) { AlignmentFileFree (afp); NCBI_THROW2(CObjReaderParseException, eFormat, "Error reading alignment", 0); } } // build the CAlignment m_Seqs.resize(afp->num_sequences); m_Ids.resize(afp->num_sequences); for (int i = 0; i < afp->num_sequences; ++i) { m_Seqs[i] = afp->sequences[i]; m_Ids[i] = afp->ids[i]; } m_Organisms.resize(afp->num_organisms); for (int i = 0; i < afp->num_organisms; ++i) { if (afp->organisms[i]) { m_Organisms[i] = afp->organisms[i]; } else { m_Organisms[i].erase(); } } m_Deflines.resize(afp->num_deflines); for (int i = 0; i < afp->num_deflines; ++i) { if (afp->deflines[i]) { m_Deflines[i] = afp->deflines[i]; } else { m_Deflines[i].erase(); } } AlignmentFileFree(afp); {{ m_Dim = m_Ids.size(); }} m_ReadDone = true; return; }
CRef<CSeq_align> CAlnReader::GetSeqAlign() { if (m_Aln) { return m_Aln; } else if ( !m_ReadDone ) { NCBI_THROW2(CObjReaderParseException, eFormat, "CAlnReader::GetSeqAlign(): " "Seq_align is not available until after Read()", 0); } typedef CDense_seg::TNumseg TNumseg; typedef CDense_seg::TDim TNumrow; m_Aln = new CSeq_align(); m_Aln->SetType(CSeq_align::eType_not_set); m_Aln->SetDim(m_Dim); CDense_seg& ds = m_Aln->SetSegs().SetDenseg(); ds.SetDim(m_Dim); CDense_seg::TIds& ids = ds.SetIds(); CDense_seg::TStarts& starts = ds.SetStarts(); //CDense_seg::TStrands& strands = ds.SetStrands(); CDense_seg::TLens& lens = ds.SetLens(); ids.resize(m_Dim); // get the length of the alignment TSeqPos aln_stop = m_Seqs[0].size(); for (TNumrow row_i = 1; row_i < m_Dim; row_i++) { if (m_Seqs[row_i].size() > aln_stop) { aln_stop = m_Seqs[row_i].size(); } } for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { CBioseq::TId xid; if (CSeq_id::ParseFastaIds(xid, m_Ids[row_i], true) > 0) { ids[row_i] = xid.front(); } else { ids[row_i] = new CSeq_id(CSeq_id::e_Local, m_Ids[row_i]); } } m_SeqVec.resize(m_Dim); for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { m_SeqVec[row_i].resize(m_Seqs[row_i].length(), 0); } m_SeqLen.resize(m_Dim, 0); vector<bool> is_gap; is_gap.resize(m_Dim, true); vector<bool> prev_is_gap; prev_is_gap.resize(m_Dim, true); vector<TSignedSeqPos> next_start; next_start.resize(m_Dim, 0); int starts_i = 0; TSeqPos prev_aln_pos = 0, prev_len = 0; bool new_seg = true; TNumseg numseg = 0; for (TSeqPos aln_pos = 0; aln_pos < aln_stop; aln_pos++) { for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { if (aln_pos >= m_Seqs[row_i].length()) { if (!is_gap[row_i]) { is_gap[row_i] = true; new_seg = true; } } else { string residue = m_Seqs[row_i].substr(aln_pos, 1); NStr::ToUpper(residue); if (NStr::Find(m_MiddleGap, residue) == string::npos && NStr::Find(m_EndGap, residue) == string::npos && NStr::Find(m_BeginningGap, residue) == string::npos) { if (is_gap[row_i]) { is_gap[row_i] = false; new_seg = true; } // add to the sequence vector m_SeqVec[row_i][m_SeqLen[row_i]++] = residue.c_str()[0]; } else { if ( !is_gap[row_i] ) { is_gap[row_i] = true; new_seg = true; } } } } if (new_seg) { if (numseg) { // if not the first seg lens.push_back(prev_len = aln_pos - prev_aln_pos); for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { if ( !prev_is_gap[row_i] ) { next_start[row_i] += prev_len; } } } starts.resize(starts_i + m_Dim); for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { if (is_gap[row_i]) { starts[starts_i++] = -1; } else { starts[starts_i++] = next_start[row_i];; } prev_is_gap[row_i] = is_gap[row_i]; } prev_aln_pos = aln_pos; numseg++; new_seg = false; } } for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { m_SeqVec[row_i].resize(m_SeqLen[row_i]); // resize down to actual size } lens.push_back(aln_stop - prev_aln_pos); //strands.resize(numseg * m_Dim, eNa_strand_plus); _ASSERT(lens.size() == numseg); ds.SetNumseg(numseg); #if _DEBUG m_Aln->Validate(true); #endif return m_Aln; }
CRef<CSeq_entry> CAlnReader::GetSeqEntry() { if (m_Entry) { return m_Entry; } else if ( !m_ReadDone ) { NCBI_THROW2(CObjReaderParseException, eFormat, "CAlnReader::GetSeqEntry(): " "Seq_entry is not available until after Read()", 0); } m_Entry = new CSeq_entry(); CRef<CSeq_annot> seq_annot (new CSeq_annot); seq_annot->SetData().SetAlign().push_back(GetSeqAlign()); m_Entry->SetSet().SetClass(CBioseq_set::eClass_pop_set); m_Entry->SetSet().SetAnnot().push_back(seq_annot); CBioseq_set::TSeq_set& seq_set = m_Entry->SetSet().SetSeq_set(); typedef CDense_seg::TDim TNumrow; for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { const string& seq_str = m_SeqVec[row_i]; const size_t& seq_str_len = seq_str.size(); CRef<CSeq_entry> seq_entry (new CSeq_entry); // seq-id(s) CBioseq::TId& ids = seq_entry->SetSeq().SetId(); CSeq_id::ParseFastaIds(ids, m_Ids[row_i], true); if (ids.empty()) { ids.push_back(CRef<CSeq_id>(new CSeq_id(CSeq_id::e_Local, m_Ids[row_i]))); } // mol CSeq_inst::EMol mol = CSeq_inst::eMol_not_set; CSeq_id::EAccessionInfo ai = ids.front()->IdentifyAccession(); if (ai & CSeq_id::fAcc_nuc) { mol = CSeq_inst::eMol_na; } else if (ai & CSeq_id::fAcc_prot) { mol = CSeq_inst::eMol_aa; } else { switch (CFormatGuess::SequenceType(seq_str.data(), seq_str_len)) { case CFormatGuess::eNucleotide: mol = CSeq_inst::eMol_na; break; case CFormatGuess::eProtein: mol = CSeq_inst::eMol_aa; break; default: break; } } // seq-inst CRef<CSeq_inst> seq_inst (new CSeq_inst); seq_entry->SetSeq().SetInst(*seq_inst); seq_set.push_back(seq_entry); // repr seq_inst->SetRepr(CSeq_inst::eRepr_raw); // mol seq_inst->SetMol(mol); // len _ASSERT(seq_str_len == m_SeqLen[row_i]); seq_inst->SetLength(seq_str_len); // data CSeq_data& data = seq_inst->SetSeq_data(); if (mol == CSeq_inst::eMol_aa) { data.SetIupacaa().Set(seq_str); } else { data.SetIupacna().Set(seq_str); CSeqportUtil::Pack(&data); } } return m_Entry; }
CStringUTF8 CHTMLHelper::HTMLDecode(const string& str, EEncoding encoding, THTMLDecodeFlags* result_flags) { CStringUTF8 ustr; THTMLDecodeFlags result = 0; if (encoding == eEncoding_Unknown) { encoding = CUtf8::GuessEncoding(str); if (encoding == eEncoding_Unknown) { NCBI_THROW2(CStringException, eBadArgs, "Unable to guess the source string encoding", 0); } } // wild guess... ustr.reserve(str.size()); string::const_iterator i, e = str.end(); char ch; TUnicodeSymbol uch; for (i = str.begin(); i != e;) { ch = *(i++); //check for HTML entities and character references if (i != e && ch == '&') { string::const_iterator itmp, end_of_entity, start_of_entity; itmp = end_of_entity = start_of_entity = i; bool ent, dec, hex, parsed=false; ent = isalpha((unsigned char)(*itmp)) != 0; dec = !ent && *itmp == '#' && ++itmp != e && isdigit((unsigned char)(*itmp)) != 0; hex = !dec && itmp != e && (*itmp == 'x' || *itmp == 'X') && ++itmp != e && isxdigit((unsigned char)(*itmp)) != 0; start_of_entity = itmp; if (itmp != e && (ent || dec || hex)) { // do not look too far for (int len=0; len<16 && itmp != e; ++len, ++itmp) { if (*itmp == '&' || *itmp == '#') { break; } if (*itmp == ';') { end_of_entity = itmp; break; } ent = ent && isalnum( (unsigned char)(*itmp)) != 0; dec = dec && isdigit( (unsigned char)(*itmp)) != 0; hex = hex && isxdigit((unsigned char)(*itmp)) != 0; } if (end_of_entity != i && (ent || dec || hex)) { uch = 0; if (ent) { string entity(start_of_entity,end_of_entity); const struct tag_HtmlEntities* p = s_HtmlEntities; for ( ; p->u != 0; ++p) { if (entity.compare(p->s) == 0) { uch = p->u; parsed = true; result |= fCharRef_Entity; break; } } } else { parsed = true; result |= fCharRef_Numeric; for (itmp = start_of_entity; itmp != end_of_entity; ++itmp) { TUnicodeSymbol ud = *itmp; if (dec) { uch = 10 * uch + (ud - '0'); } else if (hex) { if (ud >='0' && ud <= '9') { ud -= '0'; } else if (ud >='a' && ud <= 'f') { ud -= 'a'; ud += 10; } else if (ud >='A' && ud <= 'F') { ud -= 'A'; ud += 10; } uch = 16 * uch + ud; } } } if (parsed) { ustr += CUtf8::AsUTF8(&uch,1); i = ++end_of_entity; continue; } } } } // no entity - append as is if (encoding == eEncoding_UTF8 || encoding == eEncoding_Ascii) { ustr.append( 1, ch ); } else { result |= fEncoding; ustr += CUtf8::AsUTF8(CTempString(&ch,1), encoding); } } if (result_flags) { *result_flags = result; } return ustr; }
void CDate_std::GetDate(string* label, const string& format) const { static const char* const kMonths[] = { "0", "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" }; static const int kNumMonths = sizeof (kMonths) / sizeof (char*); if (!label) { return; } unsigned int depth = 0; vector<pair<SIZE_TYPE, SIZE_TYPE> > starts; starts.push_back(make_pair(label->size(), (SIZE_TYPE)0)); ITERATE (string, it, format) { if (*it != '%') { *label += *it; continue; } if (++it == format.end()) { NCBI_THROW2(CGeneralParseException, eFormat, "CDate_std::GetDate(): incomplete % expression", it - format.begin()); } // Check for things that can only immediately follow % if (*it == '%') { *label += '%'; continue; } else if (*it == '{') { depth++; starts.push_back(make_pair(label->size(), SIZE_TYPE(it - format.begin()))); continue; } else if (*it == '}') { if (depth == 0) { NCBI_THROW2(CGeneralParseException, eFormat, "CDate_std::GetDate(): unbalanced %}", it - format.begin()); } depth--; starts.pop_back(); continue; } else if (*it == '|') { // We survived, so just look for the appropriate %}. if (depth == 0) { return; // Can ignore rest of format } unsigned int depth2 = 0; for (;;) { while (++it != format.end() && *it != '%') ; if (it == format.end() || ++it == format.end()) { NCBI_THROW2(CGeneralParseException, eFormat, "CDate_std::GetDate(): unbalanced %{", starts.back().second); } if (*it == '}') { if (depth2 == 0) { depth--; starts.pop_back(); break; } else { depth2--; } } else if (*it == '{') { depth2++; } } continue; } unsigned int length = 0; int value = -1; while (isdigit((unsigned char)(*it))) { length = length * 10 + *it - '0'; if (++it == format.end()) { NCBI_THROW2(CGeneralParseException, eFormat, "CDate_std::GetDate(): incomplete % expression", it - format.begin()); } } switch (*it) { case 'Y': value = GetYear(); break; case 'M': case 'N': value = CanGetMonth() ? GetMonth() : -1; break; case 'D': value = CanGetDay() ? GetDay() : -1; break; case 'S': value = CanGetSeason() ? 1 : -1; break; case 'h': value = CanGetHour() ? GetHour() : -1; break; case 'm': value = CanGetMinute() ? GetMinute() : -1; break; case 's': value = CanGetSecond() ? GetSecond() : -1; break; default: NCBI_THROW2(CGeneralParseException, eFormat, "CDate_std::GetDate(): unrecognized format specifier", it - format.begin()); } if (value >= 0) { if (*it == 'N') { // special cases const char* name; if (value >= kNumMonths) { name = "inv"; } else { name = kMonths[value]; } if (length > 0) { label->append(name, length); } else { *label += name; } } else if (*it == 'S') { if (length > 0) { label->append(GetSeason(), 0, length); } else { *label += GetSeason(); } } else { // just a number if (length > 0) { // We want exactly <length> digits. CNcbiOstrstream oss; oss << setfill('0') << setw(length) << value; string s = CNcbiOstrstreamToString(oss); label->append(s, s.size() > length ? s.size() - length : 0, length); } else { *label += NStr::IntToString(value); } } } else { // missing...roll back label and look for alternatives, or // throw if at top level and none found label->erase(starts.back().first); char request = *it; unsigned int depth2 = 0; for (;;) { while (++it != format.end() && *it != '%') ; if (it == format.end() || ++it == format.end()) { if (depth > 0 || depth2 > 0) { NCBI_THROW2(CGeneralParseException, eFormat, "CDate_std::GetDate(): unbalanced %{", starts.back().second); } else { NCBI_THROW2(CGeneralParseException, eFormat, string("CDate_std::GetDate():" " missing required field %") + request, it - format.begin() - 1); } } if (*it == '|' && depth2 == 0) { break; } else if (*it == '}') { if (depth2 == 0) { if (depth == 0) { NCBI_THROW2(CGeneralParseException, eFormat, "CDate_std::GetDate(): unbalanced %}", it - format.begin()); } depth--; starts.pop_back(); break; } else { depth2--; } } else if (*it == '{') { depth2++; } } } } }