Beispiel #1
0
bool CBioseq_Handle::ContainsSegment(CSeq_id_Handle id,
                                     size_t resolve_depth,
                                     EFindSegment limit_flag) const
{
    CBioseq_Handle h = GetScope().GetBioseqHandle(id);
    CConstRef<CSynonymsSet> syns;
    if ( h ) {
        syns = h.GetSynonyms();
    }
    SSeqMapSelector sel;
    sel.SetFlags(CSeqMap::fFindRef);
    if ( limit_flag == eFindSegment_LimitTSE ) {
        sel.SetLimitTSE(GetTopLevelEntry());
    }
    sel.SetResolveCount(resolve_depth);
    CSeqMap_CI it = GetSeqMap().BeginResolved(&GetScope(), sel);
    for ( ; it; ++it) {
        if ( syns ) {
            if ( syns->ContainsSynonym(it.GetRefSeqid()) ) {
                return true;
            }
        }
        else {
            if (it.GetRefSeqid() == id) {
                return true;
            }
        }
    }
    return false;
}
Beispiel #2
0
CSeqVector& CAlnVec::x_GetSeqVector(TNumrow row) const
{
    TSeqVectorCache::iterator iter = m_SeqVectorCache.find(row);
    CRef<CSeqVector> seq_vec;
    if (iter != m_SeqVectorCache.end()) {
        seq_vec = iter->second;
    }
    else {
        CBioseq_Handle h = GetBioseqHandle(row);
        CSeqVector vec = h.GetSeqVector
            (CBioseq_Handle::eCoding_Iupac,
             IsPositiveStrand(row) ? 
             CBioseq_Handle::eStrand_Plus :
             CBioseq_Handle::eStrand_Minus);
        seq_vec.Reset(new CSeqVector(vec));
        m_SeqVectorCache[row] = seq_vec;
    }
    if ( seq_vec->IsNucleotide() ) {
        if (m_NaCoding != CSeq_data::e_not_set) {
            seq_vec->SetCoding(m_NaCoding);
        }
        else {
            seq_vec->SetIupacCoding();
        }
    }
    else if ( seq_vec->IsProtein() ) {
        if (m_AaCoding != CSeq_data::e_not_set) {
            seq_vec->SetCoding(m_AaCoding);
        }
        else {
            seq_vec->SetIupacCoding();
        }
    }
    return *seq_vec;
}
Beispiel #3
0
CRef<CBioseq> CBlastBioseqMaker::
        CreateBioseqFromId(CConstRef<CSeq_id> id, bool retrieve_seq_data)
{
    _ASSERT(m_scope.NotEmpty());

    // N.B.: this call fetches the Bioseq into the scope from its
    // data sources (should be BLAST DB first, then Genbank)
    TSeqPos len = sequence::GetLength(*id, m_scope);
    if (len == numeric_limits<TSeqPos>::max()) {
        NCBI_THROW(CInputException, eSeqIdNotFound,
                    "Sequence ID not found: '" + 
                    id->AsFastaString() + "'");
    }

    CBioseq_Handle bh = m_scope->GetBioseqHandle(*id);

    CRef<CBioseq> retval;
    if (retrieve_seq_data) {
        retval.Reset(const_cast<CBioseq*>(&*bh.GetCompleteBioseq()));
    } else {
        retval.Reset(new CBioseq());
        CRef<CSeq_id> idToStore(new CSeq_id);
        idToStore->Assign(*id);
        retval->SetId().push_back(idToStore);
        retval->SetInst().SetRepr(CSeq_inst::eRepr_raw);
        retval->SetInst().SetMol(bh.IsProtein() 
                                    ? CSeq_inst::eMol_aa
                                    : CSeq_inst::eMol_dna);
        retval->SetInst().SetLength(len);
    }
    return retval;
}
Beispiel #4
0
bool CBlastBioseqMaker::HasSequence(CConstRef<CSeq_id> id)
{
      CBioseq_Handle bh = m_scope->GetBioseqHandle(*id);
      CSeqVector seq_vect = bh.GetSeqVector();
      CSeqVector_CI itr(seq_vect);
      if (itr.GetGapSizeForward() == seq_vect.size())
          return false;
      else
          return true;
}
Beispiel #5
0
CSeqMap_CI::CSeqMap_CI(const CBioseq_Handle& bioseq,
                       const SSeqMapSelector& sel,
                       const CRange<TSeqPos>& range)
    : m_Scope(&bioseq.GetScope()),
      m_SearchPos(range.GetFrom()),
      m_SearchEnd(range.GetToOpen())
{
    SSeqMapSelector tse_sel(sel);
    tse_sel.SetLinkUsedTSE(bioseq.GetTSE_Handle());
    x_Select(ConstRef(&bioseq.GetSeqMap()), tse_sel, range.GetFrom());
}
Beispiel #6
0
CSeqMap_CI::CSeqMap_CI(const CBioseq_Handle& bioseq,
                       const SSeqMapSelector& sel,
                       TSeqPos pos)
    : m_Scope(&bioseq.GetScope()),
      m_SearchPos(0),
      m_SearchEnd(kInvalidSeqPos)
{
    SSeqMapSelector tse_sel(sel);
    tse_sel.SetLinkUsedTSE(bioseq.GetTSE_Handle());
    x_Select(ConstRef(&bioseq.GetSeqMap()), tse_sel, pos);
}
Beispiel #7
0
CSeqVector::CSeqVector(const CBioseq_Handle& bioseq,
                       EVectorCoding coding, ENa_strand strand)
    : m_Scope(bioseq.GetScope()),
      m_SeqMap(&bioseq.GetSeqMap()),
      m_TSE(bioseq.GetTSE_Handle()),
      m_Strand(strand),
      m_Coding(CSeq_data::e_not_set)
{
    m_Size = bioseq.GetBioseqLength();
    m_Mol = bioseq.GetSequenceType();
    SetCoding(coding);
}
Beispiel #8
0
bool CBlastBioseqMaker::IsProtein(CConstRef<CSeq_id> id)
{
    _ASSERT(m_scope.NotEmpty());

    CBioseq_Handle bh = m_scope->GetBioseqHandle(*id);
    if (!bh)
    {
        NCBI_THROW(CInputException, eSeqIdNotFound,
                    "Sequence ID not found: '" + 
                    id->AsFastaString() + "'");
    }
    return bh.IsProtein();
}
//------------------------------------------------------------------------------
void CheckDuplicates( const vector< string > & input,
                      const string & infmt,
                      const CWinMaskUtil::CIdSet * ids,
                      const CWinMaskUtil::CIdSet * exclude_ids )
{
    typedef vector< string >::const_iterator input_iterator;

    dup_lookup_table table;
    CRef<CObjectManager> om(CObjectManager::GetInstance());

    for( input_iterator i( input.begin() ); i != input.end(); ++i )
    {
        Uint4 seqnum( 0 );

        for(CWinMaskUtil::CInputBioseq_CI bs_iter(*i, infmt); bs_iter; ++bs_iter)
        {
            CBioseq_Handle bsh = *bs_iter;

            if( CWinMaskUtil::consider( bsh, ids, exclude_ids ) )
            {
                TSeqPos data_len = bsh.GetBioseqLength();
                if( data_len < MIN_SEQ_LENGTH )
                    continue;

                string id;
                sequence::GetId(bsh, sequence::eGetId_Best)
                    .GetSeqId()->GetLabel(&id);
                data_len -= SAMPLE_SKIP;
                tracker track( table, id );

                string index;
                CSeqVector data =
                    bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
                for( TSeqPos i = 0;  i < data_len;  ++i )
                {
                    index.erase();
                    data.GetSeqData(i, i + SAMPLE_LENGTH, index);
                    const dup_lookup_table::sample * sample( table[index] );

                    if( sample != 0 )
                        track( index, seqnum, i, sample->begin(), sample->end() );
                }

                table.add_seq_info( id, data );
                ++seqnum;
            }
        }
    }
}
Beispiel #10
0
//  --------------------------------------------------------------------------
bool CAsn2FastaApp::HandleSeqID( const string& seq_id )
//  --------------------------------------------------------------------------
{
    CSeq_id id(seq_id);
    CBioseq_Handle bsh = m_Scope->GetBioseqHandle( id );
    if ( ! bsh ) {
        ERR_POST(Fatal << "Unable to obtain data on ID \"" << seq_id.c_str()
          << "\"." );
    }

    //
    //  ... and use that to generate the flat file:
    //
    CSeq_entry_Handle seh = bsh.GetTopLevelEntry();
    return HandleSeqEntry(seh);
}
Beispiel #11
0
//  --------------------------------------------------------------------------
CSeq_entry_Handle CAsn2FastaApp::ObtainSeqEntryFromBioseq(
    CObjectIStream& is)
//  --------------------------------------------------------------------------
{
    try {
        CRef<CBioseq> bs(new CBioseq);
        is >> *bs;
        CBioseq_Handle bsh = m_Scope->AddBioseq(*bs);
        return bsh.GetTopLevelEntry();
    }
    catch (CException& e) {
        if (! (is.GetFailFlags() & is.eEOF)) {
            ERR_POST(Error << e);
        }
    }
    return CSeq_entry_Handle();
}
Beispiel #12
0
CBioseq_EditHandle::CBioseq_EditHandle(const CBioseq_Handle& h)
    : CBioseq_Handle(h)
{
    if ( !h.GetTSE_Handle().CanBeEdited() ) {
        NCBI_THROW(CObjMgrException, eInvalidHandle,
                   "object is not in editing mode");
    }
}
Beispiel #13
0
Uint4 GetSequenceType(const CBioseq_Handle& bsh)
{
    if (bsh.IsAa()) {
        return CSeq_id::fAcc_prot;
    }

    const CMolInfo* info = sequence::GetMolInfo(bsh);
    if (info) {
        if (info->GetBiomol() == CMolInfo::eBiomol_mRNA  ||
            info->GetBiomol() == CMolInfo::eBiomol_pre_RNA  ||
            info->GetBiomol() == CMolInfo::eBiomol_tRNA  ||
            info->GetBiomol() == CMolInfo::eBiomol_snRNA  ||
            info->GetBiomol() == CMolInfo::eBiomol_scRNA  ||
            info->GetBiomol() == CMolInfo::eBiomol_cRNA  ||
            info->GetBiomol() == CMolInfo::eBiomol_snoRNA  ||
            info->GetBiomol() == CMolInfo::eBiomol_ncRNA  ||
            info->GetBiomol() == CMolInfo::eBiomol_tmRNA) {
            return (CSeq_id::fAcc_nuc | CSeq_id::eAcc_mrna);
        }

        if (info->GetBiomol() == CMolInfo::eBiomol_genomic) {
            return (CSeq_id::fAcc_nuc | CSeq_id::fAcc_genomic);
        }
    }

    CSeq_id_Handle idh = sequence::GetId(*bsh.GetSeqId(), bsh.GetScope(),
                                         sequence::eGetId_Best);
    CSeq_id::EAccessionInfo id_info = idh.GetSeqId()->IdentifyAccession();

    if ((id_info & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_est  ||
        id_info == CSeq_id::eAcc_refseq_mrna  ||
        id_info == CSeq_id::eAcc_refseq_mrna_predicted  ||
        id_info == CSeq_id::eAcc_gpipe_mrna) {
        return (CSeq_id::fAcc_nuc | CSeq_id::eAcc_mrna);
    }
    if (id_info == CSeq_id::eAcc_refseq_chromosome  ||
        id_info == CSeq_id::eAcc_refseq_contig  ||
        id_info == CSeq_id::eAcc_refseq_genomic  ||
        id_info == CSeq_id::eAcc_refseq_genome  ||
        id_info == CSeq_id::eAcc_refseq_wgs_intermed) {
        return (CSeq_id::fAcc_nuc | CSeq_id::fAcc_genomic);
    }

    return (CSeq_id::eAcc_unknown);
}
pair<double, bool> CScoreUniqSeqCoverage::MakeScore(CBioseq_Handle const& query_handle, vector<CSeq_align const*>::const_iterator begin, vector<CSeq_align const*>::const_iterator end)
{
    CConstRef<CBioseq> bioseq = query_handle.GetCompleteBioseq();

    unsigned int qlen = 0;
    if ( !bioseq.Empty() && bioseq->IsSetLength()) {
        qlen = bioseq->GetLength();
    }

    if ( !qlen ) {
        return make_pair(0, false);
    }

    bool isDenDiag = ( (*begin)->GetSegs().Which() == CSeq_align::C_Segs::e_Dendiag) ?
                              true : false;

    CRangeCollection<TSeqPos> subj_rng_coll((*begin)->GetSeqRange(1));
    CRange<TSeqPos> q_rng((*begin)->GetSeqRange(0));
    
    CRangeCollection<TSeqPos> query_rng_coll(s_FixMinusStrandRange(q_rng));
    
    for( ++begin; begin != end; ++begin ) {
        const CRange<TSeqPos> align_subj_rng((*begin)->GetSeqRange(1));
        // subject range should always be on the positive strand
        assert(align_subj_rng.GetTo() > align_subj_rng.GetFrom());
        CRangeCollection<TSeqPos> coll(align_subj_rng);
        coll.Subtract(subj_rng_coll);

        if ( coll.empty() ) {
            continue;
        }

        if(coll[0] == align_subj_rng) {
            CRange<TSeqPos> query_rng ((*begin)->GetSeqRange(0));
            query_rng_coll += s_FixMinusStrandRange(query_rng);
            subj_rng_coll += align_subj_rng;
        }
        else {
            ITERATE (CRangeCollection<TSeqPos>, uItr, coll) {
                CRange<TSeqPos> query_rng;
                const CRange<TSeqPos> & subj_rng = (*uItr);
                CRef<CSeq_align> densegAln;
                if ( isDenDiag) {
                    densegAln = CreateDensegFromDendiag(**begin);
                }

                CAlnMap map( (isDenDiag) ? densegAln->GetSegs().GetDenseg() : (*begin)->GetSegs().GetDenseg());
                TSignedSeqPos subj_aln_start =  map.GetAlnPosFromSeqPos(1,subj_rng.GetFrom());
                TSignedSeqPos subj_aln_end =  map.GetAlnPosFromSeqPos(1,subj_rng.GetTo());
                query_rng.SetFrom(map.GetSeqPosFromAlnPos(0,subj_aln_start));
                query_rng.SetTo(map.GetSeqPosFromAlnPos(0,subj_aln_end));

                query_rng_coll += s_FixMinusStrandRange(query_rng);
                subj_rng_coll += subj_rng;
            }
        }
    }
Beispiel #15
0
string sx_GetSeqData(const CBioseq& seq)
{
    CScope scope(*CObjectManager::GetInstance());
    CBioseq_Handle bh = scope.AddBioseq(seq);
    string ret;
    bh.GetSeqVector().GetSeqData(0, kInvalidSeqPos, ret);
    NON_CONST_ITERATE ( string, i, ret ) {
        if ( *i == char(0xff) || *i == char(0) )
            *i = char(0xf);
    }
    if ( 0 ) {
        size_t w = 0;
        ITERATE ( string, i, ret ) {
            if ( w == 78 ) {
                cout << '\n';
                w = 0;
            }
            cout << "0123456789ABCDEF"[*i&0xff];
            ++w;
        }
        cout << endl;
    }
Beispiel #16
0
void CSeqdesc_CI::x_CheckRef(const CBioseq_Handle& handle)
{
    m_Ref.Reset();
    if (!handle  ||
        !handle.IsSetInst_Repr()  ||
        handle.GetInst_Repr() != CSeq_inst::eRepr_ref  ||
        !handle.IsSetInst_Ext()  ||
        !handle.GetInst_Ext().IsRef()) {
        return;
    }
    const CRef_ext& ref = handle.GetInst_Ext().GetRef();
    CConstRef<CSeq_id> ref_id(ref.GetId());
    if ( !ref_id ) return; // Bad reference location or multiple ids.
    m_Ref = handle.GetScope().GetBioseqHandle(*ref_id);
}
pair<double, bool> CScoreSeqCoverage::MakeScore(CBioseq_Handle const& query_handle, vector<CSeq_align const*>::const_iterator begin, vector<CSeq_align const*>::const_iterator end) 
{
    CConstRef<CBioseq> bioseq = query_handle.GetCompleteBioseq();

    unsigned int qlen = 0;
    if ( !bioseq.Empty() && bioseq->IsSetLength()) {
        qlen = bioseq->GetLength();
    }

    if ( !qlen ) {
        return make_pair(0, false);
    }

    // Subject coverage score
    CRangeCollection<TSeqPos> range_coll;

    for ( ; begin != end; ++begin ) {
        CRange<TSeqPos> range = (*begin)->GetSeqRange(0);
        s_FixMinusStrandRange(range);
        range_coll += range;            
    } 
    double score = ( 100.0 * range_coll.GetCoveredLength() ) / qlen;
    return make_pair(score, true);
}
Beispiel #18
0
CRef<objects::CBioseq>
SeqLocToBioseq(const objects::CSeq_loc& loc, objects::CScope& scope)
{
    CRef<CBioseq> bioseq;
    if ( !loc.GetId() ) {
        return bioseq;
    }

    // Build a Seq-entry for the query Seq-loc
    CBioseq_Handle handle = scope.GetBioseqHandle(*loc.GetId());
    if( !handle ){
        return bioseq;
    }

    bioseq.Reset( new CBioseq() );

    // add an ID for our sequence
    CRef<CSeq_id> id(new CSeq_id());
    id->Assign(*handle.GetSeqId());
    bioseq->SetId().push_back(id);

    // a title
    CRef<CSeqdesc> title( new CSeqdesc() );
    string title_str;
    id -> GetLabel(&title_str );
    title_str += ": ";
    loc.GetLabel( &title_str );
    title->SetTitle( title_str );
    bioseq->SetDescr().Set().push_back( title );

    ///
    /// create the seq-inst
    /// we can play some games here
    ///
    CSeq_inst& inst = bioseq->SetInst();

    if (handle.IsAa()) {
        inst.SetMol(CSeq_inst::eMol_aa);
    } else {
        inst.SetMol(CSeq_inst::eMol_na);
    }

    bool process_whole = false;
    if (loc.IsWhole()) {
        process_whole = true;
    } else if (loc.IsInt()) {
        TSeqRange range = loc.GetTotalRange();
        if (range.GetFrom() == 0  &&  range.GetTo() == handle.GetBioseqLength() - 1) {
            /// it's whole
            process_whole = true;
        }
    }

    /// BLAST now handles delta-seqs correctly, so we can submit this
    /// as a delta-seq
    if (process_whole) {
        /// just encode the whole sequence
        CSeqVector vec(loc, scope, CBioseq_Handle::eCoding_Iupac);
        string seq_string;
        vec.GetSeqData(0, vec.size(), seq_string);

        inst.SetRepr(CSeq_inst::eRepr_raw);
        inst.SetLength(seq_string.size());
        if (vec.IsProtein()) {
            inst.SetMol(CSeq_inst::eMol_aa);
            inst.SetSeq_data().SetIupacaa().Set().swap(seq_string);
        } else {
            inst.SetMol(CSeq_inst::eMol_na);
            inst.SetSeq_data().SetIupacna().Set().swap(seq_string);
            CSeqportUtil::Pack(&inst.SetSeq_data());
        }
    } else {
        inst.SetRepr(CSeq_inst::eRepr_delta);
        inst.SetLength(handle.GetBioseqLength());
        CDelta_ext& ext = inst.SetExt().SetDelta();

        ///
        /// create a delta sequence
        ///

        //const CSeq_id& id = sequence::GetId(loc, &scope);
        //ENa_strand strand = sequence::GetStrand(loc, &scope);
        TSeqRange range = loc.GetTotalRange();

        /// first segment: gap out to initial start of seq-loc
        if (range.GetFrom() != 0) {
            ext.AddLiteral(range.GetFrom());
        }

        CSeq_loc_CI loc_iter(loc);
        if (loc_iter) {
            TSeqRange  prev   = loc_iter.GetRange();
            ENa_strand strand = loc_iter.GetStrand();

            do {
                /// encode a literal for the included bases
                CRef<CSeq_loc> sl =
                    handle.GetRangeSeq_loc(prev.GetFrom(), prev.GetTo(), strand);

                CSeqVector vec(*sl, scope, CBioseq_Handle::eCoding_Iupac);
                string seq_string;
                vec.GetSeqData(0, vec.size(), seq_string);

                ext.AddLiteral(seq_string,
                    (vec.IsProtein() ? CSeq_inst::eMol_aa : CSeq_inst::eMol_na));

                /// skip to the next segment
                /// we may need to include a gap
                ++loc_iter;
                if (loc_iter) {
                    TSeqRange next = loc_iter.GetRange();
                    ext.AddLiteral(next.GetFrom() - prev.GetTo());
                    prev = next;
                    strand = loc_iter.GetStrand();
                }
            }
            while (loc_iter);

            /// gap at the end
            if (prev.GetTo() < handle.GetBioseqLength() - 1) {
                ext.AddLiteral(handle.GetBioseqLength() - prev.GetTo() - 1);
            }
        }
    }

    return bioseq;
}
Beispiel #19
0
void
CVecscreenRun::CFormatter::FormatResults(CNcbiOstream& out,
                                         CRef<CBlastOptionsHandle> vs_opts)
{
    const bool kPrintAlignments = static_cast<bool>(m_Outfmt == eShowAlignments);
    const CFormattingArgs::EOutputFormat fmt(CFormattingArgs::ePairwise);
    const bool kBelieveQuery(false);
    const bool kShowGi(false);
    const CSearchDatabase dbinfo(m_Screener.m_DB,
                                 CSearchDatabase::eBlastDbIsNucleotide);
    CLocalDbAdapter dbadapter(dbinfo);
    const int kNumDescriptions(0);
    const int kNumAlignments(50);   // FIXME: find this out
    const bool kIsTabular(false);

    CBlastFormat blast_formatter(vs_opts->GetOptions(), dbadapter, fmt,
                                 kBelieveQuery, out, kNumDescriptions,
                                 kNumAlignments, m_Scope, BLAST_DEFAULT_MATRIX,
                                 kShowGi, m_HtmlOutput);
    blast_formatter.PrintProlog();
    list<SVecscreenSummary> match_list = m_Screener.GetList();

    // Acknowledge the query if the alignments section won't be printed (this
    // does the acknowledgement)
    if (kPrintAlignments == false) {
        CBioseq_Handle bhandle =
            m_Scope.GetBioseqHandle(*m_Screener.m_SeqLoc->GetId(),
                                    CScope::eGetBioseq_All);
        if( !bhandle  ){
            string message = "Failed to resolve SeqId: "+m_Screener.m_SeqLoc->GetId()->AsFastaString();
            ERR_POST(message);
            NCBI_THROW(CException, eUnknown, message);
        }
        CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
        CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, 
                                                CBlastFormat::kFormatLineLength,
                                                out, kBelieveQuery,
                                                m_HtmlOutput, kIsTabular);
    }
    if (m_HtmlOutput) {
        m_Screener.m_Vecscreen->VecscreenPrint(out);
        if (match_list.empty() && !kPrintAlignments) {
            out << "<b>***** No hits found *****</b><br>\n";
        }
    } else {
        if (match_list.empty() && !kPrintAlignments) {
            out << "No hits found\n";
        } else {
            typedef pair<string, string> TLabels;
            vector<TLabels> match_labels;
            match_labels.push_back(TLabels("Strong", "Strong match"));
            match_labels.push_back(TLabels("Moderate", "Moderate match"));
            match_labels.push_back(TLabels("Weak", "Weak match"));
            match_labels.push_back(TLabels("Suspect", "Suspect origin"));

            ITERATE(vector<TLabels>, label, match_labels) {
                list<SVecscreenSummary>::iterator boundary, itr;
                boundary = stable_partition(match_list.begin(), match_list.end(),
                                            SVecscreenMatchFinder(label->first));
                if (boundary != match_list.begin()) {
                    out << label->second << "\n";
                    for (itr = match_list.begin(); itr != boundary; ++itr) {
                        out << itr->range.GetFrom()+1 << "\t" 
                            << itr->range.GetTo()+1 << "\n";
                    }
                    match_list.erase(match_list.begin(), boundary);
                }
            }
        }
    }
Beispiel #20
0
void CGapAnalysis::AddBioseqGaps(
    const CBioseq_Handle & bioseq_h,
    TAddFlag add_flags,
    TFlag fFlags,
    size_t max_resolve_count)
{
    // get CSeq_id of CBioseq
    TSeqIdConstRef pSeqId = bioseq_h.GetSeqId();
    const TSeqPos bioseq_len = bioseq_h.GetBioseqLength();

    // fFlags control  what we look at
    CSeqMap::TFlags seq_map_flags = 0;
    if( add_flags & fAddFlag_IncludeSeqGaps ) {
        seq_map_flags |= CSeqMap::fFindGap;
    }
    if( add_flags & fAddFlag_IncludeUnknownBases ) {
        seq_map_flags |= CSeqMap::fFindData;
    }

    TSeqPos end_of_last_segment = 0;  // exclusive
    bool all_segments_and_in_order = true;

    SSeqMapSelector selector;
    selector.SetFlags(seq_map_flags).SetResolveCount(max_resolve_count);
    CSeqMap_CI seqmap_ci(bioseq_h, selector);
    for( ; seqmap_ci; ++seqmap_ci ) {
        if( seqmap_ci.GetPosition() != end_of_last_segment ) {
            all_segments_and_in_order = false;
        }
        end_of_last_segment = seqmap_ci.GetEndPosition();

        CSeqMap::ESegmentType seg_type = seqmap_ci.GetType();
        switch(seg_type) {
        case CSeqMap::eSeqGap:
            _ASSERT(add_flags & fAddFlag_IncludeSeqGaps);
            AddGap(
                eGapType_SeqGap, pSeqId,
                seqmap_ci.GetLength(),
                bioseq_len,
                seqmap_ci.GetPosition(), seqmap_ci.GetEndPosition(),
                fFlags);
            break;
        case CSeqMap::eSeqData:
            _ASSERT(add_flags & fAddFlag_IncludeUnknownBases);
            x_AddGapsFromBases(
                seqmap_ci, pSeqId,
                bioseq_len, fFlags);
            break;
        default:
            NCBI_USER_THROW_FMT(
                "This segment type is not supported at this time: " <<
                static_cast<int>(seg_type) );
        }
    }

    if( end_of_last_segment != bioseq_len ) {
        all_segments_and_in_order = false;
    }
    if( ! all_segments_and_in_order ) {
        ERR_POST(
            Warning << "Not all segments on bioseq '"
            << pSeqId->AsFastaString() << "' were in order "
            "or some positions appear to have been skipped.  "
            "One possible reason is that there were far references for "
            "which no attempt was made to resolve due to max resolve count "
            "being reached.");
    }
}
Beispiel #21
0
bool CBioseq_Handle::AddUsedBioseq(const CBioseq_Handle& bh) const
{
    return GetTSE_Handle().AddUsedTSE(bh.GetTSE_Handle());
}
CRef<CBioseq_set> CMakeCdrProds::MakeCdrProds(CRef<CSeq_annot> annot,
                                              CBioseq_Handle handle)
{
    CRef<CBioseq_set> bioseq_set(new CBioseq_set);
    if (!annot->GetData().IsFtable()) {
        // Is this the right thing to do?
        // Could throw, or could return null CRef instead.
        return bioseq_set;
    }

    list<CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable();

    NON_CONST_ITERATE (list<CRef<CSeq_feat> >, feat, ftable) {
        if (!(*feat)->GetData().IsCdregion()) {
            // not interested if not a Cdregion
            continue;
        }
        if ((*feat)->IsSetProduct()) {
            // already has a product; don't make new one
            continue;
        }

        string prot;
        CSeqTranslator::Translate(**feat, handle.GetScope(), prot);
        CRef<CSeq_data> seq_data(new CSeq_data(prot,
                                               CSeq_data::e_Iupacaa));
        CRef<CSeq_inst> seq_inst(new CSeq_inst);
        seq_inst->SetSeq_data(*seq_data);
        seq_inst->SetRepr(CSeq_inst_Base::eRepr_raw);
        seq_inst->SetMol(CSeq_inst_Base::eMol_aa);
        seq_inst->SetLength(prot.size());

        CRef<CBioseq> bio_seq(new CBioseq);
        string num = NStr::NumericToString(sm_Counter.Add(1));
        // pad to five digits
        if (num.size() < 5) {
            num.insert(SIZE_TYPE(0), 5 - num.size(), '0');
        }
        string acc = "tp" + num;
        string full_acc = "lcl|" + acc;
        CRef<CSeq_id> id(new CSeq_id(full_acc));
        bio_seq->SetId().push_back(id);
        // a title
        CRef<CSeqdesc> title(new CSeqdesc);
        title->SetTitle(string("Translation product ") + acc);
        bio_seq->SetDescr().Set().push_back(title);
        // Mol_type
        CRef<CSeqdesc> mol_type(new CSeqdesc);
        mol_type->SetMol_type( eGIBB_mol_peptide);
        bio_seq->SetDescr().Set().push_back(mol_type);
        
        // set the instance
        bio_seq->SetInst(*seq_inst);
        
        // wrap this Bio_seq in an entry
        CRef<CSeq_entry> seq_entry(new CSeq_entry);
        seq_entry->SetSeq(*bio_seq);
        
        // add this entry to our Bioseq_set
        bioseq_set->SetSeq_set().push_back(seq_entry);

        // record it as product in the annot we're handed
        CRef<CSeq_loc> prod_loc(new CSeq_loc);
        prod_loc->SetWhole(*id);
        (*feat)->SetProduct(*prod_loc);
    }

    return bioseq_set;
}
Beispiel #23
0
bool CSeqMap_CI::x_Push(TSeqPos pos, bool resolveExternal)
{
    const TSegmentInfo& info = x_GetSegmentInfo();
    if ( !info.InRange() ) {
        return false;
    }
    const CSeqMap::CSegment& seg = info.x_GetSegment();
    CSeqMap::ESegmentType type = CSeqMap::ESegmentType(seg.m_SegType);

    switch ( type ) {
    case CSeqMap::eSeqSubMap:
    {{
        CConstRef<CSeqMap> push_map
            (static_cast<const CSeqMap*>(info.m_SeqMap->x_GetObject(seg)));
        // We have to copy the info.m_TSE into local variable push_tse because
        // of TSegmentInfo referenced by info can be moved inside x_Push() call.
        CTSE_Handle push_tse = info.m_TSE;
        x_Push(push_map, info.m_TSE,
               GetRefPosition(), GetLength(), GetRefMinusStrand(), pos);
        break;
    }}
    case CSeqMap::eSeqRef:
    {{
        if ( !resolveExternal ) {
            return false;
        }
        const CSeq_id& seq_id =
            static_cast<const CSeq_id&>(*info.m_SeqMap->x_GetObject(seg));
        CBioseq_Handle bh;
        if ( m_Selector.x_HasLimitTSE() ) {
            // Check TSE limit
            bh = m_Selector.x_GetLimitTSE().GetBioseqHandle(seq_id);
            if ( !bh ) {
                return false;
            }
        }
        else {
            if ( !GetScope() ) {
                NCBI_THROW(CSeqMapException, eNullPointer,
                           "Cannot resolve "+
                           seq_id.AsFastaString()+": null scope pointer");
            }
            bh = GetScope()->GetBioseqHandle(seq_id);
            if ( !bh ) {
                if ( GetFlags() & CSeqMap::fIgnoreUnresolved ) {
                    return false;
                }
                NCBI_THROW(CSeqMapException, eFail,
                           "Cannot resolve "+
                           seq_id.AsFastaString()+": unknown");
            }
        }
        if ( (GetFlags() & CSeqMap::fByFeaturePolicy) &&
            bh.GetFeatureFetchPolicy() == bh.eFeatureFetchPolicy_only_near ) {
            return false;
        }
        if ( info.m_TSE ) {
            if ( !info.m_TSE.AddUsedTSE(bh.GetTSE_Handle()) ) {
                m_Selector.AddUsedTSE(bh.GetTSE_Handle());
            }
        }
        size_t depth = m_Stack.size();
        x_Push(ConstRef(&bh.GetSeqMap()), bh.GetTSE_Handle(),
               GetRefPosition(), GetLength(), GetRefMinusStrand(), pos);
        if (m_Stack.size() == depth) {
            return false;
        }
        m_Selector.PushResolve();
        if ( (m_Stack.size() & 63) == 0 ) {
            // check for self-recursion every 64'th stack frame
            const CSeqMap* top_seq_map = &m_Stack.back().x_GetSeqMap();
            for ( int i = m_Stack.size()-2; i >= 0; --i ) {
                if ( &m_Stack[i].x_GetSeqMap() == top_seq_map ) {
                    NCBI_THROW(CSeqMapException, eSelfReference,
                               "Self-reference in CSeqMap");
                }
            }
        }
        break;
    }}
    default:
        return false;
    }
    return true;
}
int CTestSeqMapSwitch::Run()
{
    const CArgs& args = GetArgs();
    
    CScope scope(*CObjectManager::GetInstance());

    if ( args["file"] ) {
        CRef<CSeq_entry> entry(new CSeq_entry);
        auto_ptr<CObjectIStream> in(CObjectIStream::Open(eSerial_AsnText,
                                                         args["file"].AsInputFile()));
        *in >> *entry;
        scope.AddTopLevelSeqEntry(*entry);
    }

    CSeq_id_Handle id;
    if ( args["id"] ) {
        CSeq_id seq_id(args["id"].AsString());
        id = CSeq_id_Handle::GetHandle(seq_id);
    }

    CBioseq_Handle bh = scope.GetBioseqHandle(id);
    if ( !bh ) {
        ERR_POST(Fatal << "no bioseq found");
    }
    
    
    TSeqMapSwitchPoints pp = GetAllSwitchPoints(bh);
    ITERATE ( TSeqMapSwitchPoints, it, pp ) {
        const CSeqMapSwitchPoint& p = **it;
        NcbiCout << "Switch @ " << p.m_MasterPos
                 << " " << p.m_LeftId.AsString()
                 << " -> " << p.m_RightId.AsString() << NcbiEndl;
        NcbiCout << "    range: " << p.m_MasterRange.GetFrom()
                 << ".." << p.m_MasterRange.GetTo() << NcbiEndl;
        NcbiCout << "    exact: " << p.m_ExactMasterRange.GetFrom()
                 << ".." << p.m_ExactMasterRange.GetTo() << NcbiEndl;
        TSeqPos pos, add;
        int diff;

        pos = p.m_MasterRange.GetFrom();

        add = p.GetInsert(pos);
        diff = p.GetLengthDifference(pos, add);
        NcbiCout << " if switched @ " << pos << " diff="<<diff << NcbiEndl;

        pos = p.m_MasterRange.GetTo();

        add = p.GetInsert(pos);
        diff = p.GetLengthDifference(pos, add);
        NcbiCout << " if switched @ " << pos << " diff="<<diff << NcbiEndl;
    }
    if ( args["pos"] ) {
        TSeqPos pos = args["pos"].AsInteger();
        NON_CONST_ITERATE ( TSeqMapSwitchPoints, it, pp ) {
            CSeqMapSwitchPoint& p = **it;
            if ( pos >= p.m_MasterRange.GetFrom() &&
                 pos <= p.m_MasterRange.GetTo() ) {
                NcbiCout << "Switching to " << pos << NcbiEndl;
                NcbiCout << "Before: " <<
                    MSerial_AsnText << *bh.GetCompleteObject() << NcbiEndl;
                p.ChangeSwitchPoint(pos, 0);
                NcbiCout << "After: " << 
                    MSerial_AsnText << *bh.GetCompleteObject() << NcbiEndl;
                break;
            }
        }
    }
Beispiel #25
0
//-------------------------------------------------------------------------
int CWinMaskApplication::Run (void)
{
    SetDiagPostLevel( eDiag_Warning );
    CWinMaskConfig aConfig( GetArgs() );

    // Branch away immediately if the converter is called.
    //
    // if( GetArgs()["convert"].AsBoolean() ) {
    if( aConfig.AppType() == CWinMaskConfig::eConvertCounts )
    {
        if( aConfig.Output() == "-" ) {
            CWinMaskCountsConverter converter( 
                    aConfig.Input(),
                    NcbiCout,
                    aConfig.SFormat(),
                    aConfig.GetMetaData() );
            return converter();
        }
        else {
            CWinMaskCountsConverter converter( 
                    aConfig.Input(),
                    aConfig.Output(),
                    aConfig.SFormat(),
                    aConfig.GetMetaData() );
            return converter();
        }
    }

    CRef<CObjectManager> om(CObjectManager::GetInstance());
    if(aConfig.InFmt() == "seqids")
        CGBDataLoader::RegisterInObjectManager(
            *om, 0, CObjectManager::eDefault );

    // Read and validate configuration values.
    if( aConfig.AppType() == CWinMaskConfig::eComputeCounts )
    {
        if( aConfig.Output() == "-" ) {
            CWinMaskCountsGenerator cg( aConfig.Input(),
                                        NcbiCout,
                                        aConfig.InFmt(),
                                        aConfig.SFormat(),
                                        aConfig.Th(),
                                        aConfig.Mem(),
                                        aConfig.UnitSize(),
                                        aConfig.GenomeSize(),
                                        aConfig.MinScore(),
                                        aConfig.MaxScore(),
                                        aConfig.CheckDup(),
                                        aConfig.FaList(),
                                        aConfig.Ids(),
                                        aConfig.ExcludeIds(),
                                        aConfig.UseBA(),
                                        aConfig.GetMetaData() );
            cg();
        }
        else {
            CWinMaskCountsGenerator cg( aConfig.Input(),
                                        aConfig.Output(),
                                        aConfig.InFmt(),
                                        aConfig.SFormat(),
                                        aConfig.Th(),
                                        aConfig.Mem(),
                                        aConfig.UnitSize(),
                                        aConfig.GenomeSize(),
                                        aConfig.MinScore(),
                                        aConfig.MaxScore(),
                                        aConfig.CheckDup(),
                                        aConfig.FaList(),
                                        aConfig.Ids(),
                                        aConfig.ExcludeIds(),
                                        aConfig.UseBA(),
                                        aConfig.GetMetaData() );
            cg();
        }

        return 0;
    }

    if(aConfig.InFmt() == "seqids"){
        LOG_POST(Error << "windowmasker with seqids input not implemented yet");
        return 1;
    }

    CMaskReader & theReader = aConfig.Reader();
    CMaskWriter & theWriter = aConfig.Writer();
    CSeqMasker theMasker( aConfig.LStatName(),
                          aConfig.WindowSize(),
                          aConfig.WindowStep(),
                          aConfig.UnitStep(),
                          aConfig.Textend(),
                          aConfig.CutoffScore(),
                          aConfig.MaxScore(),
                          aConfig.MinScore(),
                          aConfig.SetMaxScore(),
                          aConfig.SetMinScore(),
                          aConfig.MergePass(),
                          aConfig.MergeCutoffScore(),
                          aConfig.AbsMergeCutoffDist(),
                          aConfig.MeanMergeCutoffDist(),
                          aConfig.MergeUnitStep(),
                          aConfig.Trigger(),
                          aConfig.TMin_Count(),
                          aConfig.Discontig(),
                          aConfig.Pattern(),
                          aConfig.UseBA() );
    CRef< CSeq_entry > aSeqEntry( 0 );
    Uint4 total = 0, total_masked = 0;
    CSDustMasker * duster( 0 );
    const CWinMaskConfig::CIdSet * ids( aConfig.Ids() );
    const CWinMaskConfig::CIdSet * exclude_ids( aConfig.ExcludeIds() );

    if( aConfig.AppType() == CWinMaskConfig::eGenerateMasksWithDuster )
        duster = new CSDustMasker( aConfig.DustWindow(),
                                   aConfig.DustLevel(),
                                   aConfig.DustLinker() );

    while( (aSeqEntry = theReader.GetNextSequence()).NotEmpty() )
    {
        if( aSeqEntry->Which() == CSeq_entry::e_not_set ) continue;
        CScope scope(*om);
        CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(*aSeqEntry);
        Uint4 masked = 0;
        CBioseq_CI bs_iter(seh, CSeq_inst::eMol_na);
        for ( ;  bs_iter;  ++bs_iter) {
            CBioseq_Handle bsh = *bs_iter;
            if (bsh.GetBioseqLength() == 0) {
                continue;
            }

            if( CWinMaskUtil::consider( bsh, ids, exclude_ids ) )
            {
                TSeqPos len = bsh.GetBioseqLength();
                total += len;
                _TRACE( "Sequence length " << len );
                CSeqVector data =
                    bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
                auto_ptr< CSeqMasker::TMaskList > mask_info( theMasker( data ) );
                CSeqMasker::TMaskList dummy;

                if( duster != 0 ) // Dust and merge with mask_info
                {
                    auto_ptr< CSeqMasker::TMaskList > dust_info( 
                        (*duster)( data, *mask_info.get() ) );
                    CSeqMasker::MergeMaskInfo( mask_info.get(), dust_info.get() );
                }

                // theWriter.Print( bsh, *mask_info, aConfig.MatchId() );
                theWriter.Print( bsh, *mask_info, GetArgs()["parse_seqids"] );

                for( CSeqMasker::TMaskList::const_iterator i = mask_info->begin();
                     i != mask_info->end(); ++i )
                    masked += i->second - i->first + 1;

                total_masked += masked;
                _TRACE( "Number of positions masked: " << masked );
            }
        }
    }

    _TRACE( "Total number of positions: " << total );
    _TRACE( "Total number of positions masked: " << total_masked );
    return 0;
}