Пример #1
0
void CVDBMgr::x_Init(void)
{
    if ( rc_t rc = VDBManagerMakeRead(x_InitPtr(), 0) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot open VDBManager", rc);
    }
    uint32_t sdk_ver;
    if ( rc_t rc = VDBManagerVersion(*this, &sdk_ver) ) {
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot get VDBManager version", rc);
    }
    CKNSManager kns_mgr(CVFSManager(*this));
    CNcbiOstrstream str;
    CNcbiApplication* app = CNcbiApplication::Instance();
    if ( app ) {
        str << app->GetAppName() << ": " << app->GetVersion().Print() << "; ";
    }
#if NCBI_PACKAGE
    str << "Package: " << NCBI_PACKAGE_NAME << ' ' <<
        NCBI_PACKAGE_VERSION << "; ";
#endif
    str << "C++ ";
#ifdef NCBI_PRODUCTION_VER
    str << NCBI_PRODUCTION_VER << "/";
#endif
#ifdef NCBI_DEVELOPMENT_VER
    str << NCBI_DEVELOPMENT_VER;
#endif
    string prefix = CNcbiOstrstreamToString(str);
    KNSManagerSetUserAgent(kns_mgr, "%s; SRA Toolkit %V",
                           prefix.c_str(),
                           sdk_ver);

    // redirect VDB log to C++ Toolkit
    if ( s_GetDiagHandler() ) {
        KLogInit();
        KLogLevelSet(klogDebug);
        KLogLibHandlerSet(VDBLogWriter, 0);
    }

    if ( app ) {
        string host = app->GetConfig().GetString("CONN", "HTTP_PROXY_HOST", kEmptyStr);
        int port = app->GetConfig().GetInt("CONN", "HTTP_PROXY_PORT", 0);
        if ( !host.empty() && port != 0 ) {
            if ( rc_t rc = KNSManagerSetHTTPProxyPath(kns_mgr, "%s:%d", host.c_str(), port) ) {
                NCBI_THROW2(CSraException, eInitFailed,
                            "Cannot set KNSManager proxy parameters", rc);
            }
            KNSManagerSetHTTPProxyEnabled(kns_mgr, true);
        }
    }
}
Пример #2
0
void CExecAndParseStructuredOutput::ThrowUnexpectedCharError()
{
    size_t position = m_Ch - m_NSOutput.data() + 1;

    if (*m_Ch == '\0') {
        NCBI_THROW2(CStringException, eFormat,
                "Unexpected end of NetSchedule output", position);
    } else {
        NCBI_THROW2(CStringException, eFormat,
                "Unexpected character in NetSchedule output", position);
    }
}
Пример #3
0
void CSraPath::x_Init(void)
{
    CSraMgr::RegisterFunctions();
    if ( rc_t rc = SRAPathMake(x_InitPtr(), 0) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot make SRAPath", rc);
    }
    if ( rc_t rc = SRAPathClear(*this) ) {
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot clear SRAPath", rc);
    }
}
Пример #4
0
void CKConfig::Commit() const
{
    if ( rc_t rc = KConfigCommit(const_cast<KConfig*>(GetPointer())) ) {
        NCBI_THROW2(CSraException, eOtherError,
                    "CKConfig: Cannot commit config changes", rc);
    }
}
Пример #5
0
void CVDBMgr::DeleteCacheOlderThan(Uint4 days)
{
    if ( rc_t rc = VDBManagerDeleteCacheOlderThan(*this, days) ) {
        NCBI_THROW2(CSraException, eOtherError,
                    "CVDBMgr: Cannot delete old cache files", rc);
    }
}
Пример #6
0
void CVFSManager::x_InitNew(void)
{
    if ( rc_t rc = VFSManagerMake(x_InitPtr()) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot create VFSManager", rc);
    }
}
Пример #7
0
CKDBManager::CKDBManager(const CVDBMgr& mgr)
{
    if ( rc_t rc = VDBManagerGetKDBManagerRead(mgr, x_InitPtr()) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot get KDBManager", rc);
    }
}
Пример #8
0
CKNSManager::CKNSManager(const CVFSManager& mgr)
{
    if ( rc_t rc = VFSManagerGetKNSMgr(mgr, x_InitPtr()) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot get KNSManager", rc);
    }
}
Пример #9
0
void CVDBMgr::SetCacheRoot(const string& path)
{
    CVPath vpath(CVFSManager(*this), path, CVPath::eSys);
    if ( rc_t rc = VDBManagerSetCacheRoot(*this, vpath) ) {
        NCBI_THROW2(CSraException, eOtherError,
                    "CVDBMgr: Cannot set cache root", rc);
    }
}
Пример #10
0
void CSraMgr::x_DoInit(void)
{
    if ( rc_t rc = SRAMgrMakeRead(x_InitPtr()) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot open SRAMgr", rc);
    }
}
Пример #11
0
CVResolver::CVResolver(const CVFSManager& mgr, const CKConfig& cfg)
    : m_Mgr(mgr)
{
    if ( rc_t rc = VFSManagerMakeResolver(mgr, x_InitPtr(), cfg) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot create VResolver", rc);
    }
}
Пример #12
0
CKConfig::CKConfig(const CVDBMgr& mgr)
{
    *x_InitPtr() = VFSManagerGetConfig(CVFSManager(mgr));
    if ( rc_t rc = KConfigAddRef(*this) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot get reference to KConfig", rc);
    }
}
Пример #13
0
CKConfig::CKConfig(void)
{
    KConfig* cfg;
    if ( rc_t rc = KConfigMake(&cfg, 0) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot create KConfig", rc);
    }
    *x_InitPtr() = cfg;
}
Пример #14
0
string CVPath::ToString(EType type) const
{
    const String* str = 0;
    if (type == eSys) {
        if (rc_t rc = VPathMakeSysPath(*this, &str)) {
            NCBI_THROW2(CSraException, eOtherError,
                "Cannot get path from VPath", rc);
        }
    }
    else {
        if (rc_t rc = VPathMakeString(*this, &str)) {
            NCBI_THROW2(CSraException, eOtherError,
                "Cannot get path from VPath", rc);
        }
    }
    string ret(str->addr, str->size);
    StringWhack(str);
    return ret;
}
Пример #15
0
void CVDBCursor::CloseRow(void)
{
    if ( !RowIsOpened() ) {
        return;
    }
    if ( rc_t rc = VCursorCloseRow(*this) ) {
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot close VDB cursor row", rc);
    }
    m_RowOpened = false;
}
Пример #16
0
void CVDBCursor::Init(const CVDBTable& table)
{
    if ( *this ) {
        NCBI_THROW2(CSraException, eInvalidState,
                    "Cannot init VDB cursor again",
                    RC(rcApp, rcCursor, rcConstructing, rcSelf, rcOpen));
    }
    if ( rc_t rc = VTableCreateCursorRead(table, x_InitPtr()) ) {
        *x_InitPtr() = 0;
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot create VDB cursor", rc);
    }
    if ( rc_t rc = VCursorPermitPostOpenAdd(*this) ) {
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot allow VDB cursor post open column add", rc);
    }
    if ( rc_t rc = VCursorOpen(*this) ) {
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot open VDB cursor", rc);
    }
    m_Table = table;
}
Пример #17
0
string CVDBMgr::GetCacheRoot() const
{
    const VPath* ret;
    if ( rc_t rc = VDBManagerGetCacheRoot(*this, &ret) ) {
        if ( GetRCObject(rc) == RCObject(rcPath) &&
             GetRCState(rc) == rcNotFound ) {
            return kEmptyStr;
        }
        NCBI_THROW2(CSraException, eOtherError,
                    "CVDBMgr: Cannot get cache root", rc);
    }
    return CVPath(ret).ToString(CVPath::eSys);
}
Пример #18
0
//  ----------------------------------------------------------------------------
void CMultiReaderApp::xProcessDefault(
    const CArgs& args,
    CNcbiIstream& istr,
    CNcbiOstream& ostr)
//  ----------------------------------------------------------------------------
{
    auto_ptr<CReaderBase> pReader(CReaderBase::GetReader(m_uFormat, m_iFlags));
    if (!pReader.get()) {
        NCBI_THROW2(CObjReaderParseException, eFormat,
            "File format not supported", 0);
    }
    CRef<CSerialObject> object = pReader->ReadObject(istr, m_pErrors);
    xWriteObject(*object, ostr);
}
Пример #19
0
CExprValue::CExprValue(Uint8 value)
: ival(0)
, m_sval("")
, m_Var(NULL)
, m_Pos(0)
, m_Tag(eINT)
{
    if (static_cast<Uint8>(numeric_limits<Int8>::max()) < value) {
        NCBI_THROW2(CExprParserException, 
                eTypeConversionError, 
                "Value too big to fit in the 8-byte signed integer type", 
                m_Pos);
    }

    ival = static_cast<Int8>(value);
}
Пример #20
0
Int8 CNetScheduleStructuredOutputParser::ParseInt(size_t len)
{
    Int8 val = NStr::StringToInt8(CTempString(m_Ch, len));

    if (*m_Ch == '-') {
        ++m_Ch;
        --len;
    }
    if (*m_Ch == '0' && len > 1) {
        NCBI_THROW2(CStringException, eFormat,
                "Leading zeros are not allowed", GetPosition());
    }

    m_Ch += len;
    return val;
}
Пример #21
0
CVDBTable::CVDBTable(const CVDBMgr& mgr,
                     const string& acc_or_path,
                     EMissing missing)
    : m_Name(acc_or_path)
{
    *x_InitPtr() = 0;
    VSchema *schema;
    DECLARE_SDK_GUARD();
    if ( rc_t rc = SRASchemaMake(&schema, mgr) ) {
        NCBI_THROW2(CSraException, eInitFailed,
                    "Cannot make default SRA schema", rc);
    }
    string path = CVPath::ConvertAccOrSysPathToPOSIX(acc_or_path);
    if ( rc_t rc = VDBManagerOpenTableRead(mgr, x_InitPtr(), schema, "%.*s",
                                           int(path.size()), path.data()) ) {
        *x_InitPtr() = 0;
        VSchemaRelease(schema);
        if ( (GetRCObject(rc) == RCObject(rcDirectory) ||
              GetRCObject(rc) == RCObject(rcPath)) &&
             GetRCState(rc) == rcNotFound ) {
            // no SRA accession
            if ( missing != eMissing_Throw ) {
                return;
            }
            NCBI_THROW2_FMT(CSraException, eNotFoundTable,
                            "Cannot open SRA table: "<<acc_or_path, rc);
        }
        else if ( GetRCObject(rc) == RCObject(rcDatabase) &&
                  GetRCState(rc) == rcIncorrect ) {
            // invalid SRA database
            NCBI_THROW2_FMT(CSraException, eDataError,
                            "Cannot open SRA table: "<<acc_or_path, rc);
        }
        else {
            // other errors
            NCBI_THROW2_FMT(CSraException, eOtherError,
                            "Cannot open SRA table: "<<acc_or_path, rc);
        }
    }
    VSchemaRelease(schema);
}
Пример #22
0
CJsonNode CNetScheduleStructuredOutputParser::ParseValue()
{
    size_t max_len = GetRemainder();
    size_t len = 0;

    switch (*m_Ch) {
    /* Array */
    case '[':
        ++m_Ch;
        return ParseArray(']');

    /* Object */
    case '{':
        ++m_Ch;
        return ParseObject('}');

    /* String */
    case '\'':
    case '"':
        return CJsonNode::NewStringNode(ParseString(max_len));

    /* Number */
    case '-':
        // Check that there's at least one digit after the minus sign.
        if (max_len <= 1 || !isdigit((unsigned char) m_Ch[1])) {
            ++m_Ch;
            break;
        }
        len = 1;

    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
        // Skim through the integer part.
        do
            if (++len >= max_len)
                return CJsonNode::NewIntegerNode(ParseInt(len));
        while (isdigit((unsigned char) m_Ch[len]));

        // Stumbled upon a non-digit character -- check
        // if it's a fraction part or an exponent part.
        switch (m_Ch[len]) {
        case '.':
            if (++len == max_len || !isdigit((unsigned char) m_Ch[len])) {
                NCBI_THROW2(CStringException, eFormat,
                        "At least one digit after the decimal "
                        "point is required", GetPosition());
            }
            for (;;) {
                if (++len == max_len)
                    return CJsonNode::NewDoubleNode(ParseDouble(len));

                if (!isdigit((unsigned char) m_Ch[len])) {
                    if (m_Ch[len] == 'E' || m_Ch[len] == 'e')
                        break;

                    return CJsonNode::NewDoubleNode(ParseDouble(len));
                }
            }
            /* FALL THROUGH */

        case 'E':
        case 'e':
            if (++len == max_len ||
                    (m_Ch[len] == '-' || m_Ch[len] == '+' ?
                            ++len == max_len ||
                                    !isdigit((unsigned char) m_Ch[len]) :
                            !isdigit((unsigned char) m_Ch[len]))) {
                m_Ch += len;
                NCBI_THROW2(CStringException, eFormat,
                        "Invalid exponent specification", GetPosition());
            }
            while (++len < max_len && isdigit((unsigned char) m_Ch[len]))
                ;
            return CJsonNode::NewDoubleNode(ParseDouble(len));

        default:
            return CJsonNode::NewIntegerNode(ParseInt(len));
        }

    /* Constant */
    case 'F': case 'f': case 'N': case 'n':
    case 'T': case 't': case 'Y': case 'y':
        while (len <= max_len && isalpha((unsigned char) m_Ch[len]))
            ++len;

        {
            CTempString val(m_Ch, len);
            m_Ch += len;
            return val == "null" ? CJsonNode::NewNullNode() :
                CJsonNode::NewBooleanNode(NStr::StringToBool(val));
        }
    }

    INVALID_FORMAT_ERROR();
}
Пример #23
0
CDecimal& CDecimal::operator=(const CTempStringEx& s)
{
    int ptr = 0;
    char c = s[ptr++];
 // skip leading blanks
    while ( isspace((unsigned char)c) ) {
        c = s[ptr++];
    }
    
    m_Sign = 0;
    if ( c == '-' ) {
        m_Sign = -1;
        c = s[ptr++];
    }
    else if ( c == '+' ) {
        m_Sign = +1;
        c = s[ptr++];
    }

    bool dot = false, expn = false, anydigits = false;
    int dot_position = 0;
    m_Mantissa.erase();

// up to exponent
    for ( ; ; c = s[ptr++] ) {
        if (c >= '0' && c <= '9') {
            // digits: accumulate
            anydigits = true;
            if ( m_Mantissa.empty() ) {
                if ( c != '0' ) {
                    m_Mantissa += c;
                }
                else {
                    if ( dot ) {
                        --dot_position;
                    }
                }
            }
            else {
                m_Mantissa += c;
            }
        }
        else if (c == '.') {
            // dot
            // if second dot, stop
            if (dot) {
                --ptr;
                break;
            }
            dot_position = m_Mantissa.size();
            dot = true;
        }
        else if (c == 'e' || c == 'E') {
            // if exponent, stop
            if (!anydigits) {
                --ptr;
                break;
            }
            expn = true;
            break;
        }
        else if (!c) {
            --ptr;
            break;
        }
        else {
            --ptr;
            NCBI_THROW2(CStringException, eConvert,
                        "Cannot convert '"+string(s)+"'", ptr);
        }
    }
    // if no digits, stop now - error
    if (!anydigits) {
        NCBI_THROW2(CStringException, eConvert,
                    "Cannot convert '"+string(s)+"'", ptr);
    }
    int exponent = dot ? dot_position - m_Mantissa.size() : 0;
// read exponent
    if (expn && s[ptr]) {
        int expvalue = 0;
        bool expsign = false, expnegate= false;
        int expdigits= 0;
        for( ; ; ++ptr) {
            c = s[ptr];
            // sign: should be no digits at this point
            if (c == '-' || c == '+') {
                // if there was sign or digits, stop
                if (expsign || expdigits) {
                    break;
                }
                expsign = true;
                expnegate = c == '-';
            }
            // digits: accumulate
            else if (c >= '0' && c <= '9') {
                ++expdigits;
                int newexpvalue = expvalue*10 + (c-'0');
                if (newexpvalue > expvalue) {
                    expvalue = newexpvalue;
                }
            }
            else {
                break;
            }
        }
        // if no digits, rollback
        if (!expdigits) {
            // rollback sign
            if (expsign) {
                --ptr;
            }
            // rollback exponent
            if (expn) {
                --ptr;
            }
        }
        else {
            exponent = expnegate ? exponent - expvalue : exponent + expvalue;
        }
    }
    m_Exponent = exponent+m_Mantissa.size();

    if ( !m_Sign && !m_Mantissa.empty() ) {
        m_Sign = 1;
    }

    Normalize();
    return *this;
}
Пример #24
0
void CAlnReader::Read(bool guess, bool generate_local_ids)
{
    if (m_ReadDone) {
        return;
    }

    // make a SSequenceInfo corresponding to our CSequenceInfo argument
    SSequenceInfo info;
    info.alphabet      = const_cast<char *>(m_Alphabet.c_str());
    info.beginning_gap = const_cast<char *>(m_BeginningGap.c_str());
    info.end_gap       = const_cast<char *>(m_EndGap.c_str());;
    info.middle_gap    = const_cast<char *>(m_MiddleGap.c_str());
    info.missing       = const_cast<char *>(m_Missing.c_str());
    info.match         = const_cast<char *>(m_Match.c_str());

    // read the alignment stream
    TAlignmentFilePtr afp;
    m_Errors.clear();
    afp = ReadAlignmentFile2(s_ReadLine, (void *) &m_IS,
                            s_ReportError, &(m_Errors), &info,
                            (generate_local_ids ? eTrue : eFalse));
    if (!afp) {
        NCBI_THROW2(CObjReaderParseException, eFormat,
                   "Error reading alignment", 0);
    }
    
    int first_len = strlen (afp->sequences[0]);
    for (int i = 1; i < afp->num_sequences; i++) {
        if (strlen (afp->sequences[i]) != first_len) {
            AlignmentFileFree (afp);
            NCBI_THROW2(CObjReaderParseException, eFormat,
                       "Error reading alignment: Not all sequences have same length", 0);
        }
    }
    
    // if we're trying to guess whether this is an alignment file,
    // and no tell-tale alignment format lines were found,
    // check to see if any of the lines contain gaps.
    // no gaps plus no alignment indicators -> don't guess alignment
    if (guess && !afp->align_format_found) {
        bool found_gap = false;
        for (int i = 0; i < afp->num_sequences && !found_gap; i++) {
            if (strchr (afp->sequences[i], '-') != NULL) {
                found_gap = true;
            }
        }
        if (!found_gap) {
            AlignmentFileFree (afp);
            NCBI_THROW2(CObjReaderParseException, eFormat,
                       "Error reading alignment", 0);
        }
    }

    // build the CAlignment
    m_Seqs.resize(afp->num_sequences);
    m_Ids.resize(afp->num_sequences);
    for (int i = 0;  i < afp->num_sequences;  ++i) {
        m_Seqs[i] = afp->sequences[i];
        m_Ids[i] = afp->ids[i];
    }
    m_Organisms.resize(afp->num_organisms);
    for (int i = 0;  i < afp->num_organisms;  ++i) {
        if (afp->organisms[i]) {
            m_Organisms[i] = afp->organisms[i];
        } else {
            m_Organisms[i].erase();
        }
    }
    m_Deflines.resize(afp->num_deflines);
    for (int i = 0;  i < afp->num_deflines;  ++i) {
        if (afp->deflines[i]) {
            m_Deflines[i] = afp->deflines[i];
        } else {
            m_Deflines[i].erase();
        }
    }

    AlignmentFileFree(afp);

    {{
        m_Dim = m_Ids.size();
    }}

    m_ReadDone = true;

    return;
}
Пример #25
0
CRef<CSeq_align> CAlnReader::GetSeqAlign()
{
    if (m_Aln) {
        return m_Aln;
    } else if ( !m_ReadDone ) {
        NCBI_THROW2(CObjReaderParseException, eFormat,
                   "CAlnReader::GetSeqAlign(): "
                   "Seq_align is not available until after Read()", 0);
    }

    typedef CDense_seg::TNumseg TNumseg;
    typedef CDense_seg::TDim TNumrow;

    m_Aln = new CSeq_align();
    m_Aln->SetType(CSeq_align::eType_not_set);
    m_Aln->SetDim(m_Dim);

    CDense_seg& ds = m_Aln->SetSegs().SetDenseg();
    ds.SetDim(m_Dim);
    
    CDense_seg::TIds&     ids     = ds.SetIds();
    CDense_seg::TStarts&  starts  = ds.SetStarts();
    //CDense_seg::TStrands& strands = ds.SetStrands();
    CDense_seg::TLens&    lens    = ds.SetLens();

    ids.resize(m_Dim);

    // get the length of the alignment
    TSeqPos aln_stop = m_Seqs[0].size();
    for (TNumrow row_i = 1; row_i < m_Dim; row_i++) {
        if (m_Seqs[row_i].size() > aln_stop) {
            aln_stop = m_Seqs[row_i].size();
        }
    }

    for (TNumrow row_i = 0; row_i < m_Dim; row_i++) {
        CBioseq::TId xid;
        if (CSeq_id::ParseFastaIds(xid, m_Ids[row_i], true) > 0) {
            ids[row_i] = xid.front();
        } else {
            ids[row_i] = new CSeq_id(CSeq_id::e_Local, m_Ids[row_i]);
        }
    }

    m_SeqVec.resize(m_Dim);
    for (TNumrow row_i = 0; row_i < m_Dim; row_i++) {
        m_SeqVec[row_i].resize(m_Seqs[row_i].length(), 0);
    }
    m_SeqLen.resize(m_Dim, 0);
    vector<bool> is_gap;  is_gap.resize(m_Dim, true);
    vector<bool> prev_is_gap;  prev_is_gap.resize(m_Dim, true);
    vector<TSignedSeqPos> next_start; next_start.resize(m_Dim, 0);
    int starts_i = 0;
    TSeqPos prev_aln_pos = 0, prev_len = 0;
    bool new_seg = true;
    TNumseg numseg = 0;
    
    for (TSeqPos aln_pos = 0; aln_pos < aln_stop; aln_pos++) {
        for (TNumrow row_i = 0; row_i < m_Dim; row_i++) {
            if (aln_pos >= m_Seqs[row_i].length()) {
                if (!is_gap[row_i]) {
                    is_gap[row_i] = true;
                    new_seg = true;
                }
            } else {
                string residue = m_Seqs[row_i].substr(aln_pos, 1);
                NStr::ToUpper(residue);
                if (NStr::Find(m_MiddleGap, residue) == string::npos  &&
                    NStr::Find(m_EndGap, residue) == string::npos  &&
                    NStr::Find(m_BeginningGap, residue) == string::npos) {

                    if (is_gap[row_i]) {
                        is_gap[row_i] = false;
                        new_seg = true;
                    }

                    // add to the sequence vector
                    m_SeqVec[row_i][m_SeqLen[row_i]++] = residue.c_str()[0];

                } else {
  
                    if ( !is_gap[row_i] ) {
                        is_gap[row_i] = true;
                        new_seg = true;
                    }
                }

            }
        }

        if (new_seg) {
            if (numseg) { // if not the first seg
                lens.push_back(prev_len = aln_pos - prev_aln_pos);
                for (TNumrow row_i = 0; row_i < m_Dim; row_i++) {
                    if ( !prev_is_gap[row_i] ) {
                        next_start[row_i] += prev_len;
                    }
                }
            }

            starts.resize(starts_i + m_Dim);
            for (TNumrow row_i = 0; row_i < m_Dim; row_i++) {
                if (is_gap[row_i]) {
                    starts[starts_i++] = -1;
                } else {
                    starts[starts_i++] = next_start[row_i];;
                }
                prev_is_gap[row_i] = is_gap[row_i];
            }

            prev_aln_pos = aln_pos;

            numseg++;
            new_seg = false;
        }
    }

    for (TNumrow row_i = 0; row_i < m_Dim; row_i++) {
        m_SeqVec[row_i].resize(m_SeqLen[row_i]); // resize down to actual size
    }

    lens.push_back(aln_stop - prev_aln_pos);
    //strands.resize(numseg * m_Dim, eNa_strand_plus);
    _ASSERT(lens.size() == numseg);
    ds.SetNumseg(numseg);

#if _DEBUG
    m_Aln->Validate(true);
#endif    
    return m_Aln;
}
Пример #26
0
CRef<CSeq_entry> CAlnReader::GetSeqEntry()
{
    if (m_Entry) {
        return m_Entry;
    } else if ( !m_ReadDone ) {
        NCBI_THROW2(CObjReaderParseException, eFormat,
                   "CAlnReader::GetSeqEntry(): "
                   "Seq_entry is not available until after Read()", 0);
    }
    m_Entry = new CSeq_entry();
    CRef<CSeq_annot> seq_annot (new CSeq_annot);
    seq_annot->SetData().SetAlign().push_back(GetSeqAlign());

    m_Entry->SetSet().SetClass(CBioseq_set::eClass_pop_set);
    m_Entry->SetSet().SetAnnot().push_back(seq_annot);

    CBioseq_set::TSeq_set& seq_set = m_Entry->SetSet().SetSeq_set();

    typedef CDense_seg::TDim TNumrow;
    for (TNumrow row_i = 0; row_i < m_Dim; row_i++) {
        const string& seq_str     = m_SeqVec[row_i];
        const size_t& seq_str_len = seq_str.size();

        CRef<CSeq_entry> seq_entry (new CSeq_entry);

        // seq-id(s)
        CBioseq::TId& ids = seq_entry->SetSeq().SetId();
        CSeq_id::ParseFastaIds(ids, m_Ids[row_i], true);
        if (ids.empty()) {
            ids.push_back(CRef<CSeq_id>(new CSeq_id(CSeq_id::e_Local,
                                                    m_Ids[row_i])));
        }

        // mol
        CSeq_inst::EMol mol   = CSeq_inst::eMol_not_set;
        CSeq_id::EAccessionInfo ai = ids.front()->IdentifyAccession();
        if (ai & CSeq_id::fAcc_nuc) {
            mol = CSeq_inst::eMol_na;
        } else if (ai & CSeq_id::fAcc_prot) {
            mol = CSeq_inst::eMol_aa;
        } else {
            switch (CFormatGuess::SequenceType(seq_str.data(), seq_str_len)) {
            case CFormatGuess::eNucleotide:  mol = CSeq_inst::eMol_na;  break;
            case CFormatGuess::eProtein:     mol = CSeq_inst::eMol_aa;  break;
            default:                         break;
            }
        }

        // seq-inst
        CRef<CSeq_inst> seq_inst (new CSeq_inst);
        seq_entry->SetSeq().SetInst(*seq_inst);
        seq_set.push_back(seq_entry);

        // repr
        seq_inst->SetRepr(CSeq_inst::eRepr_raw);

        // mol
        seq_inst->SetMol(mol);

        // len
        _ASSERT(seq_str_len == m_SeqLen[row_i]);
        seq_inst->SetLength(seq_str_len);

        // data
        CSeq_data& data = seq_inst->SetSeq_data();
        if (mol == CSeq_inst::eMol_aa) {
            data.SetIupacaa().Set(seq_str);
        } else {
            data.SetIupacna().Set(seq_str);
            CSeqportUtil::Pack(&data);
        }

    }
    
    
    return m_Entry;
}
Пример #27
0
CStringUTF8 CHTMLHelper::HTMLDecode(const string& str, EEncoding encoding,
                                    THTMLDecodeFlags* result_flags)
{
    CStringUTF8 ustr;
    THTMLDecodeFlags result = 0;
    if (encoding == eEncoding_Unknown) {
        encoding = CUtf8::GuessEncoding(str);
        if (encoding == eEncoding_Unknown) {
            NCBI_THROW2(CStringException, eBadArgs,
                "Unable to guess the source string encoding", 0);
        }
    }
    // wild guess...
    ustr.reserve(str.size());

    string::const_iterator i, e = str.end();
    char ch;
    TUnicodeSymbol uch;

    for (i = str.begin(); i != e;) {
        ch = *(i++);
        //check for HTML entities and character references
        if (i != e && ch == '&') {
            string::const_iterator itmp, end_of_entity, start_of_entity;
            itmp = end_of_entity = start_of_entity = i;
            bool ent, dec, hex, parsed=false;
            ent = isalpha((unsigned char)(*itmp)) != 0;
            dec = !ent && *itmp == '#' && ++itmp != e &&
                  isdigit((unsigned char)(*itmp)) != 0;
            hex = !dec && itmp != e &&
                  (*itmp == 'x' || *itmp == 'X') && ++itmp != e &&
                  isxdigit((unsigned char)(*itmp)) != 0;
            start_of_entity = itmp;
            if (itmp != e && (ent || dec || hex)) {
                // do not look too far
                for (int len=0; len<16 && itmp != e; ++len, ++itmp) {
                    if (*itmp == '&' || *itmp == '#') {
                        break;
                    }
                    if (*itmp == ';') {
                        end_of_entity = itmp;
                        break;
                    }
                    ent = ent && isalnum( (unsigned char)(*itmp)) != 0;
                    dec = dec && isdigit( (unsigned char)(*itmp)) != 0;
                    hex = hex && isxdigit((unsigned char)(*itmp)) != 0;
                }
                if (end_of_entity != i && (ent || dec || hex)) {
                    uch = 0;
                    if (ent) {
                        string entity(start_of_entity,end_of_entity);
                        const struct tag_HtmlEntities* p = s_HtmlEntities;
                        for ( ; p->u != 0; ++p) {
                            if (entity.compare(p->s) == 0) {
                                uch = p->u;
                                parsed = true;
                                result |= fCharRef_Entity;
                                break;
                            }
                        }
                    } else {
                        parsed = true;
                        result |= fCharRef_Numeric;
                        for (itmp = start_of_entity;
                             itmp != end_of_entity; ++itmp) {
                            TUnicodeSymbol ud = *itmp;
                            if (dec) {
                                uch = 10 * uch + (ud - '0');
                            } else if (hex) {
                                if (ud >='0' && ud <= '9') {
                                    ud -= '0';
                                } else if (ud >='a' && ud <= 'f') {
                                    ud -= 'a';
                                    ud += 10;
                                } else if (ud >='A' && ud <= 'F') {
                                    ud -= 'A';
                                    ud += 10;
                                }
                                uch = 16 * uch + ud;
                            }
                        }
                    }
                    if (parsed) {
                        ustr += CUtf8::AsUTF8(&uch,1);
                        i = ++end_of_entity;
                        continue;
                    }
                }
            }
        }
// no entity - append as is
        if (encoding == eEncoding_UTF8 || encoding == eEncoding_Ascii) {
            ustr.append( 1, ch );
        } else {
            result |= fEncoding;
            ustr += CUtf8::AsUTF8(CTempString(&ch,1), encoding);
        }
    }
    if (result_flags) {
        *result_flags = result;
    }
    return ustr;
}
Пример #28
0
void CDate_std::GetDate(string* label, const string& format) const
{
    static const char* const kMonths[] = {
        "0", "January", "February", "March", "April", "May", "June",
        "July", "August", "September", "October", "November", "December"
    };
    static const int kNumMonths = sizeof (kMonths) / sizeof (char*);

    if (!label) {
        return;
    }
    unsigned int                        depth = 0;
    vector<pair<SIZE_TYPE, SIZE_TYPE> > starts;
    starts.push_back(make_pair(label->size(), (SIZE_TYPE)0));
    ITERATE (string, it, format) {
        if (*it != '%') {
            *label += *it;
            continue;
        }
        if (++it == format.end()) {
            NCBI_THROW2(CGeneralParseException, eFormat,
                        "CDate_std::GetDate(): incomplete % expression",
                        it - format.begin());
        }
        // Check for things that can only immediately follow %
        if (*it == '%') {
            *label += '%';
            continue;
        }
        else if (*it == '{') {
            depth++;
            starts.push_back(make_pair(label->size(),
                                       SIZE_TYPE(it - format.begin())));
            continue;
        } else if (*it == '}') {
            if (depth == 0) {
                NCBI_THROW2(CGeneralParseException, eFormat,
                            "CDate_std::GetDate(): unbalanced %}",
                            it - format.begin());
            }
            depth--;
            starts.pop_back();
            continue;
        } else if (*it == '|') {
            // We survived, so just look for the appropriate %}.
            if (depth == 0) {
                return; // Can ignore rest of format
            }
            unsigned int depth2 = 0;
            for (;;) {
                while (++it != format.end()  &&  *it != '%')
                    ;
                if (it == format.end()  ||  ++it == format.end()) {
                    NCBI_THROW2(CGeneralParseException, eFormat,
                                "CDate_std::GetDate(): unbalanced %{",
                                starts.back().second);
                }
                if (*it == '}') {
                    if (depth2 == 0) {
                        depth--;
                        starts.pop_back();
                        break;
                    } else {
                        depth2--;
                    }
                } else if (*it == '{') {
                    depth2++;
                }
            }
            continue;
        }

        unsigned int length = 0;
        int          value  = -1;
        while (isdigit((unsigned char)(*it))) {
            length = length * 10 + *it - '0';
            if (++it == format.end()) {
                NCBI_THROW2(CGeneralParseException, eFormat,
                            "CDate_std::GetDate(): incomplete % expression",
                            it - format.begin());
            }
        }
        switch (*it) {
        case 'Y': value = GetYear(); break;
        case 'M':
        case 'N': value = CanGetMonth()  ? GetMonth()  : -1; break;
        case 'D': value = CanGetDay()    ? GetDay()    : -1; break;
        case 'S': value = CanGetSeason() ? 1           : -1; break;
        case 'h': value = CanGetHour()   ? GetHour()   : -1; break;
        case 'm': value = CanGetMinute() ? GetMinute() : -1; break;
        case 's': value = CanGetSecond() ? GetSecond() : -1; break;
        default:
            NCBI_THROW2(CGeneralParseException, eFormat,
                        "CDate_std::GetDate(): unrecognized format specifier",
                        it - format.begin());
        }

        if (value >= 0) {
            if (*it == 'N') { // special cases
                const char* name;
                if (value >= kNumMonths) {
                    name = "inv";
                } else {
                    name = kMonths[value];
                }
                if (length > 0) {
                    label->append(name, length);
                } else {
                    *label += name;
                }
            } else if (*it == 'S') {
                if (length > 0) {
                    label->append(GetSeason(), 0, length);
                } else {
                    *label += GetSeason();
                }
            } else { // just a number
                if (length > 0) {
                    // We want exactly <length> digits.
                    CNcbiOstrstream oss;
                    oss << setfill('0') << setw(length) << value;
                    string s = CNcbiOstrstreamToString(oss);
                    label->append(s, s.size() > length ? s.size() - length : 0,
                                  length);
                } else {
                    *label += NStr::IntToString(value);
                }
            }
        } else {
            // missing...roll back label and look for alternatives, or
            // throw if at top level and none found
            label->erase(starts.back().first);
            char         request = *it;
            unsigned int depth2  = 0;
            for (;;) {
                while (++it != format.end()  &&  *it != '%')
                    ;
                if (it == format.end()  ||  ++it == format.end()) {
                    if (depth > 0  ||  depth2 > 0) {
                        NCBI_THROW2(CGeneralParseException, eFormat,
                                    "CDate_std::GetDate(): unbalanced %{",
                                    starts.back().second);
                    } else {
                        NCBI_THROW2(CGeneralParseException, eFormat,
                                   string("CDate_std::GetDate():"
                                          " missing required field %")
                                   + request, it - format.begin() - 1);
                    }
                }
                if (*it == '|'  &&  depth2 == 0) {
                    break;
                } else if (*it == '}') {
                    if (depth2 == 0) {
                        if (depth == 0) {
                            NCBI_THROW2(CGeneralParseException, eFormat,
                                        "CDate_std::GetDate(): unbalanced %}",
                                        it - format.begin());
                        }
                        depth--;
                        starts.pop_back();
                        break;
                    } else {
                        depth2--;
                    }
                } else if (*it == '{') {
                    depth2++;
                }
            }
        }
    }
}