Exemplo n.º 1
0
void s_Check(const CBioseq& seq)
{
    _ASSERT(!seq.GetId().empty());
    const CSeq_inst& inst = seq.GetInst();
    const string& seqdata = inst.GetSeq_data().GetIupacna().Get();
    _ASSERT(seqdata.size() == inst.GetLength());
    ITERATE ( string, i, seqdata ) {
        _ASSERT(*i >= 'A' && *i <= 'Z');
    }
Exemplo n.º 2
0
CSeqVector::CSeqVector(const CBioseq& bioseq,
                       CScope* scope,
                       EVectorCoding coding, ENa_strand strand)
    : m_Scope(scope),
      m_SeqMap(CSeqMap::CreateSeqMapForBioseq(bioseq)),
      m_Strand(strand),
      m_Coding(CSeq_data::e_not_set)
{
    m_Size = m_SeqMap->GetLength(scope);
    m_Mol = bioseq.GetInst().GetMol();
    SetCoding(coding);
}
Exemplo n.º 3
0
bool
CBlastBioseqMaker::IsEmptyBioseq(const CBioseq& bioseq)
{
    if (bioseq.CanGetInst()) {
        const CSeq_inst& inst = bioseq.GetInst();
        return (inst.GetRepr() == CSeq_inst::eRepr_raw &&
                inst.CanGetMol() &&
                inst.CanGetLength() &&
                inst.CanGetSeq_data() == false);
    }
    return false;

}
bool CSeqAnnotFromFasta::PurgeNonAlphaFromSequence(CBioseq& bioseq) {

    bool result = false;
    CSeq_inst::TLength newLength;

    string originalSequence;
//    CRef< CBioseq > bioseq;
//    if (cd.GetBioseqForIndex(index, bioseq)) {

    if (bioseq.GetInst().IsSetSeq_data()) {

        CSeq_data& seqData = bioseq.SetInst().SetSeq_data();

        if (seqData.IsNcbieaa()) {
            originalSequence = seqData.SetNcbieaa().Set();
        } else if (seqData.IsIupacaa()) {
            originalSequence = seqData.SetIupacaa().Set();
        } else if (seqData.IsNcbistdaa()) {
            std::vector < char >& vec = seqData.SetNcbistdaa().Set();
            NcbistdaaToNcbieaaString(vec, &originalSequence);
        }

        if (PurgeNonAlpha(originalSequence)) {
//            if (originalSequence.length() > 0 && find_if(originalSequence.begin(), originalSequence.end(), isNotAlpha) != originalSequence.end()) {

//                originalSequence.erase(remove_if(originalSequence.begin(), originalSequence.end(), isNotAlpha), originalSequence.end());
            
//            _TRACE("after remove non-alpha:  \n" << originalSequence);
                
            seqData.Select(CSeq_data::e_Ncbieaa);
            seqData.SetNcbieaa().Set(originalSequence);
            result = true;
        }
        newLength = originalSequence.length();
        bioseq.SetInst().SetLength(newLength);

    }
//    }
    return result;
}
Exemplo n.º 5
0
Sequence::Sequence(const CBioseq& bioseq) :
    status(CAV_ERROR_SEQUENCES), bioseqASN(&bioseq), seqIDs(bioseq.GetId()), mmdbLink(NOT_SET)
{
    // try to get description from title or compound
    if (bioseq.IsSetDescr()) {
        CSeq_descr::Tdata::const_iterator d, de = bioseq.GetDescr().Get().end();
        for (d=bioseq.GetDescr().Get().begin(); d!=de; ++d) {
            if (d->GetObject().IsTitle()) {
                description = d->GetObject().GetTitle();
                break;
            } else if (d->GetObject().IsPdb() && d->GetObject().GetPdb().GetCompound().size() > 0) {
                description = d->GetObject().GetPdb().GetCompound().front();
                break;
            }
        }
    }

    // get link to MMDB id - mainly for CDD's where Biostrucs have to be loaded separately
    if (bioseq.IsSetAnnot()) {
        CBioseq::TAnnot::const_iterator a, ae = bioseq.GetAnnot().end();
        for (a=bioseq.GetAnnot().begin(); a!=ae; ++a) {
            if (a->GetObject().GetData().IsIds()) {
                CSeq_annot::C_Data::TIds::const_iterator i, ie = a->GetObject().GetData().GetIds().end();
                for (i=a->GetObject().GetData().GetIds().begin(); i!=ie; ++i) {
                    if (i->GetObject().IsGeneral() &&
                        i->GetObject().GetGeneral().GetDb() == "mmdb" &&
                        i->GetObject().GetGeneral().GetTag().IsId()) {
                        mmdbLink = i->GetObject().GetGeneral().GetTag().GetId();
                        break;
                    }
                }
                if (i != ie) break;
            }
        }
    }
    if (mmdbLink != NOT_SET)
        ERR_POST_X(3, Info << "sequence " << GetTitle() << " is from MMDB id " << mmdbLink);

    // get sequence string
    if (bioseq.GetInst().GetRepr() == CSeq_inst::eRepr_raw && bioseq.GetInst().IsSetSeq_data()) {

        // protein formats
        if (bioseq.GetInst().GetSeq_data().IsNcbieaa()) {
            sequenceString = bioseq.GetInst().GetSeq_data().GetNcbieaa().Get();
        } else if (bioseq.GetInst().GetSeq_data().IsIupacaa()) {
            sequenceString = bioseq.GetInst().GetSeq_data().GetIupacaa().Get();
        } else if (bioseq.GetInst().GetSeq_data().IsNcbistdaa()) {
            StringFromStdaa(bioseq.GetInst().GetSeq_data().GetNcbistdaa().Get(), &sequenceString);
        }

        // nucleotide formats
        else if (bioseq.GetInst().GetSeq_data().IsIupacna()) {
            sequenceString = bioseq.GetInst().GetSeq_data().GetIupacna().Get();
        } else if (bioseq.GetInst().GetSeq_data().IsNcbi4na()) {
            StringFrom4na(bioseq.GetInst().GetSeq_data().GetNcbi4na().Get(), &sequenceString,
                (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna));
        } else if (bioseq.GetInst().GetSeq_data().IsNcbi8na()) {  // same repr. for non-X as 4na
            StringFrom4na(bioseq.GetInst().GetSeq_data().GetNcbi8na().Get(), &sequenceString,
                (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna));
        } else if (bioseq.GetInst().GetSeq_data().IsNcbi2na()) {
            StringFrom2na(bioseq.GetInst().GetSeq_data().GetNcbi2na().Get(), &sequenceString,
                (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna));
            if (bioseq.GetInst().IsSetLength() && bioseq.GetInst().GetLength() < sequenceString.length())
                sequenceString.resize(bioseq.GetInst().GetLength());
        }

        else {
            ERR_POST_X(4, Critical << "Sequence::Sequence() - sequence " << GetTitle()
                          << ": confused by sequence string format");
            return;
        }
        if (bioseq.GetInst().IsSetLength() && bioseq.GetInst().GetLength() != sequenceString.length()) {
            ERR_POST_X(5, Critical << "Sequence::Sequence() - sequence string length mismatch");
            return;
        }
    } else {
        ERR_POST_X(6, Critical << "Sequence::Sequence() - sequence " << GetTitle()
                      << ": confused by sequence representation");
        return;
    }

    status = CAV_SUCCESS;
}