void s_Check(const CBioseq& seq) { _ASSERT(!seq.GetId().empty()); const CSeq_inst& inst = seq.GetInst(); const string& seqdata = inst.GetSeq_data().GetIupacna().Get(); _ASSERT(seqdata.size() == inst.GetLength()); ITERATE ( string, i, seqdata ) { _ASSERT(*i >= 'A' && *i <= 'Z'); }
Sequence::Sequence(const CBioseq& bioseq) : status(CAV_ERROR_SEQUENCES), bioseqASN(&bioseq), seqIDs(bioseq.GetId()), mmdbLink(NOT_SET) { // try to get description from title or compound if (bioseq.IsSetDescr()) { CSeq_descr::Tdata::const_iterator d, de = bioseq.GetDescr().Get().end(); for (d=bioseq.GetDescr().Get().begin(); d!=de; ++d) { if (d->GetObject().IsTitle()) { description = d->GetObject().GetTitle(); break; } else if (d->GetObject().IsPdb() && d->GetObject().GetPdb().GetCompound().size() > 0) { description = d->GetObject().GetPdb().GetCompound().front(); break; } } } // get link to MMDB id - mainly for CDD's where Biostrucs have to be loaded separately if (bioseq.IsSetAnnot()) { CBioseq::TAnnot::const_iterator a, ae = bioseq.GetAnnot().end(); for (a=bioseq.GetAnnot().begin(); a!=ae; ++a) { if (a->GetObject().GetData().IsIds()) { CSeq_annot::C_Data::TIds::const_iterator i, ie = a->GetObject().GetData().GetIds().end(); for (i=a->GetObject().GetData().GetIds().begin(); i!=ie; ++i) { if (i->GetObject().IsGeneral() && i->GetObject().GetGeneral().GetDb() == "mmdb" && i->GetObject().GetGeneral().GetTag().IsId()) { mmdbLink = i->GetObject().GetGeneral().GetTag().GetId(); break; } } if (i != ie) break; } } } if (mmdbLink != NOT_SET) ERR_POST_X(3, Info << "sequence " << GetTitle() << " is from MMDB id " << mmdbLink); // get sequence string if (bioseq.GetInst().GetRepr() == CSeq_inst::eRepr_raw && bioseq.GetInst().IsSetSeq_data()) { // protein formats if (bioseq.GetInst().GetSeq_data().IsNcbieaa()) { sequenceString = bioseq.GetInst().GetSeq_data().GetNcbieaa().Get(); } else if (bioseq.GetInst().GetSeq_data().IsIupacaa()) { sequenceString = bioseq.GetInst().GetSeq_data().GetIupacaa().Get(); } else if (bioseq.GetInst().GetSeq_data().IsNcbistdaa()) { StringFromStdaa(bioseq.GetInst().GetSeq_data().GetNcbistdaa().Get(), &sequenceString); } // nucleotide formats else if (bioseq.GetInst().GetSeq_data().IsIupacna()) { sequenceString = bioseq.GetInst().GetSeq_data().GetIupacna().Get(); } else if (bioseq.GetInst().GetSeq_data().IsNcbi4na()) { StringFrom4na(bioseq.GetInst().GetSeq_data().GetNcbi4na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); } else if (bioseq.GetInst().GetSeq_data().IsNcbi8na()) { // same repr. for non-X as 4na StringFrom4na(bioseq.GetInst().GetSeq_data().GetNcbi8na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); } else if (bioseq.GetInst().GetSeq_data().IsNcbi2na()) { StringFrom2na(bioseq.GetInst().GetSeq_data().GetNcbi2na().Get(), &sequenceString, (bioseq.GetInst().GetMol() == CSeq_inst::eMol_dna)); if (bioseq.GetInst().IsSetLength() && bioseq.GetInst().GetLength() < sequenceString.length()) sequenceString.resize(bioseq.GetInst().GetLength()); } else { ERR_POST_X(4, Critical << "Sequence::Sequence() - sequence " << GetTitle() << ": confused by sequence string format"); return; } if (bioseq.GetInst().IsSetLength() && bioseq.GetInst().GetLength() != sequenceString.length()) { ERR_POST_X(5, Critical << "Sequence::Sequence() - sequence string length mismatch"); return; } } else { ERR_POST_X(6, Critical << "Sequence::Sequence() - sequence " << GetTitle() << ": confused by sequence representation"); return; } status = CAV_SUCCESS; }