void s_Check(const CBioseq& seq) { _ASSERT(!seq.GetId().empty()); const CSeq_inst& inst = seq.GetInst(); const string& seqdata = inst.GetSeq_data().GetIupacna().Get(); _ASSERT(seqdata.size() == inst.GetLength()); ITERATE ( string, i, seqdata ) { _ASSERT(*i >= 'A' && *i <= 'Z'); }
CSeqVector::CSeqVector(const CBioseq& bioseq, CScope* scope, EVectorCoding coding, ENa_strand strand) : m_Scope(scope), m_SeqMap(CSeqMap::CreateSeqMapForBioseq(bioseq)), m_Strand(strand), m_Coding(CSeq_data::e_not_set) { m_Size = m_SeqMap->GetLength(scope); m_Mol = bioseq.GetInst().GetMol(); SetCoding(coding); }
bool CBlastBioseqMaker::IsEmptyBioseq(const CBioseq& bioseq) { if (bioseq.CanGetInst()) { const CSeq_inst& inst = bioseq.GetInst(); return (inst.GetRepr() == CSeq_inst::eRepr_raw && inst.CanGetMol() && inst.CanGetLength() && inst.CanGetSeq_data() == false); } return false; }
bool CSeqAnnotFromFasta::PurgeNonAlphaFromSequence(CBioseq& bioseq) { bool result = false; CSeq_inst::TLength newLength; string originalSequence; // CRef< CBioseq > bioseq; // if (cd.GetBioseqForIndex(index, bioseq)) { if (bioseq.GetInst().IsSetSeq_data()) { CSeq_data& seqData = bioseq.SetInst().SetSeq_data(); if (seqData.IsNcbieaa()) { originalSequence = seqData.SetNcbieaa().Set(); } else if (seqData.IsIupacaa()) { originalSequence = seqData.SetIupacaa().Set(); } else if (seqData.IsNcbistdaa()) { std::vector < char >& vec = seqData.SetNcbistdaa().Set(); NcbistdaaToNcbieaaString(vec, &originalSequence); } if (PurgeNonAlpha(originalSequence)) { // if (originalSequence.length() > 0 && find_if(originalSequence.begin(), originalSequence.end(), isNotAlpha) != originalSequence.end()) { // originalSequence.erase(remove_if(originalSequence.begin(), originalSequence.end(), isNotAlpha), originalSequence.end()); // _TRACE("after remove non-alpha: \n" << originalSequence); seqData.Select(CSeq_data::e_Ncbieaa); seqData.SetNcbieaa().Set(originalSequence); result = true; } newLength = originalSequence.length(); bioseq.SetInst().SetLength(newLength); } // } return result; }
Sequence::Sequence(const CBioseq& bioseq) : status(CAV_ERROR_SEQUENCES), bioseqASN(&bioseq), seqIDs(bioseq.GetId()), mmdbLink(NOT_SET) { // try to get description from title or compound if (bioseq.IsSetDescr()) { CSeq_descr::Tdata::const_iterator d, de = bioseq.GetDescr().Get().end(); for (d=bioseq.GetDescr().Get().begin(); d!=de; ++d) { if (d->GetObject().IsTitle()) { description = d->GetObject().GetTitle(); break; } else if (d->GetObject().IsPdb() && d->GetObject().GetPdb().GetCompound().size() > 0) { description = d->GetObject().GetPdb().GetCompound().front(); break; } } } // get link to MMDB id - mainly for CDD's where Biostrucs have to be loaded separately if (bioseq.IsSetAnnot()) { CBioseq::TAnnot::const_iterator a, ae = bioseq.GetAnnot().end(); for (a=bioseq.GetAnnot().begin(); a!=ae; ++a) { if (a->GetObject().GetData().IsIds()) { CSeq_annot::C_Data::TIds::const_iterator i, ie = a->GetObject().GetData().GetIds().end(); for (i=a->GetObject().GetData().GetIds().begin(); i!=ie; ++i) { if (i->GetObject().IsGeneral() && i->GetObject().GetGeneral().GetDb() == "mmdb" && i->GetObject().GetGeneral().GetTag().IsId()) { mmdbLink = i->GetObject().GetGeneral().GetTag().GetId(); break; } } if (i != ie) break; } } } if (mmdbLink != NOT_SET) ERR_POST_X(3, Info << "sequence " << GetTitle() << " is from MMDB id " << mmdbLink); // get sequence string if (bioseq.GetInst().GetRepr() == CSeq_inst::eRepr_raw && bioseq.GetInst().IsSetSeq_data()) { // protein formats if (bioseq.GetInst().GetSeq_data().IsNcbieaa()) { sequenceString = bioseq.GetInst().GetSeq_data().GetNcbieaa().Get(); } else if (bioseq.GetInst().GetSeq_data().IsIupacaa()) { sequenceString = bioseq.GetInst().GetSeq_data().GetIupacaa().Get(); } else if (bioseq.GetInst().GetSeq_data().IsNcbistdaa()) { StringFromStdaa(bioseq.GetInst().GetSeq_data().GetNcbistdaa().Get(), &sequenceString); } // nucleotide formats else if (bioseq.GetInst().GetSeq_data().IsIupacna()) { sequenceString = bioseq.GetInst().GetSeq_data().GetIupacna().Get(); } else if (bioseq.GetInst().GetSeq_data().IsNcbi4na()) { StringFrom4na(bioseq.GetInst().GetSeq_data().GetNcbi4na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); } else if (bioseq.GetInst().GetSeq_data().IsNcbi8na()) { // same repr. for non-X as 4na StringFrom4na(bioseq.GetInst().GetSeq_data().GetNcbi8na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); } else if (bioseq.GetInst().GetSeq_data().IsNcbi2na()) { StringFrom2na(bioseq.GetInst().GetSeq_data().GetNcbi2na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); if (bioseq.GetInst().IsSetLength() && bioseq.GetInst().GetLength() < sequenceString.length()) sequenceString.resize(bioseq.GetInst().GetLength()); } else { ERR_POST_X(4, Critical << "Sequence::Sequence() - sequence " << GetTitle() << ": confused by sequence string format"); return; } if (bioseq.GetInst().IsSetLength() && bioseq.GetInst().GetLength() != sequenceString.length()) { ERR_POST_X(5, Critical << "Sequence::Sequence() - sequence string length mismatch"); return; } } else { ERR_POST_X(6, Critical << "Sequence::Sequence() - sequence " << GetTitle() << ": confused by sequence representation"); return; } status = CAV_SUCCESS; }