CRef<CSeq_entry> CAlnReader::GetSeqEntry() { if (m_Entry) { return m_Entry; } else if ( !m_ReadDone ) { NCBI_THROW2(CObjReaderParseException, eFormat, "CAlnReader::GetSeqEntry(): " "Seq_entry is not available until after Read()", 0); } m_Entry = new CSeq_entry(); CRef<CSeq_annot> seq_annot (new CSeq_annot); seq_annot->SetData().SetAlign().push_back(GetSeqAlign()); m_Entry->SetSet().SetClass(CBioseq_set::eClass_pop_set); m_Entry->SetSet().SetAnnot().push_back(seq_annot); CBioseq_set::TSeq_set& seq_set = m_Entry->SetSet().SetSeq_set(); typedef CDense_seg::TDim TNumrow; for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { const string& seq_str = m_SeqVec[row_i]; const size_t& seq_str_len = seq_str.size(); CRef<CSeq_entry> seq_entry (new CSeq_entry); // seq-id(s) CBioseq::TId& ids = seq_entry->SetSeq().SetId(); CSeq_id::ParseFastaIds(ids, m_Ids[row_i], true); if (ids.empty()) { ids.push_back(CRef<CSeq_id>(new CSeq_id(CSeq_id::e_Local, m_Ids[row_i]))); } // mol CSeq_inst::EMol mol = CSeq_inst::eMol_not_set; CSeq_id::EAccessionInfo ai = ids.front()->IdentifyAccession(); if (ai & CSeq_id::fAcc_nuc) { mol = CSeq_inst::eMol_na; } else if (ai & CSeq_id::fAcc_prot) { mol = CSeq_inst::eMol_aa; } else { switch (CFormatGuess::SequenceType(seq_str.data(), seq_str_len)) { case CFormatGuess::eNucleotide: mol = CSeq_inst::eMol_na; break; case CFormatGuess::eProtein: mol = CSeq_inst::eMol_aa; break; default: break; } } // seq-inst CRef<CSeq_inst> seq_inst (new CSeq_inst); seq_entry->SetSeq().SetInst(*seq_inst); seq_set.push_back(seq_entry); // repr seq_inst->SetRepr(CSeq_inst::eRepr_raw); // mol seq_inst->SetMol(mol); // len _ASSERT(seq_str_len == m_SeqLen[row_i]); seq_inst->SetLength(seq_str_len); // data CSeq_data& data = seq_inst->SetSeq_data(); if (mol == CSeq_inst::eMol_aa) { data.SetIupacaa().Set(seq_str); } else { data.SetIupacna().Set(seq_str); CSeqportUtil::Pack(&data); } } return m_Entry; }
CRef<CBioseq_set> CMakeCdrProds::MakeCdrProds(CRef<CSeq_annot> annot, CBioseq_Handle handle) { CRef<CBioseq_set> bioseq_set(new CBioseq_set); if (!annot->GetData().IsFtable()) { // Is this the right thing to do? // Could throw, or could return null CRef instead. return bioseq_set; } list<CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable(); NON_CONST_ITERATE (list<CRef<CSeq_feat> >, feat, ftable) { if (!(*feat)->GetData().IsCdregion()) { // not interested if not a Cdregion continue; } if ((*feat)->IsSetProduct()) { // already has a product; don't make new one continue; } string prot; CSeqTranslator::Translate(**feat, handle.GetScope(), prot); CRef<CSeq_data> seq_data(new CSeq_data(prot, CSeq_data::e_Iupacaa)); CRef<CSeq_inst> seq_inst(new CSeq_inst); seq_inst->SetSeq_data(*seq_data); seq_inst->SetRepr(CSeq_inst_Base::eRepr_raw); seq_inst->SetMol(CSeq_inst_Base::eMol_aa); seq_inst->SetLength(prot.size()); CRef<CBioseq> bio_seq(new CBioseq); string num = NStr::NumericToString(sm_Counter.Add(1)); // pad to five digits if (num.size() < 5) { num.insert(SIZE_TYPE(0), 5 - num.size(), '0'); } string acc = "tp" + num; string full_acc = "lcl|" + acc; CRef<CSeq_id> id(new CSeq_id(full_acc)); bio_seq->SetId().push_back(id); // a title CRef<CSeqdesc> title(new CSeqdesc); title->SetTitle(string("Translation product ") + acc); bio_seq->SetDescr().Set().push_back(title); // Mol_type CRef<CSeqdesc> mol_type(new CSeqdesc); mol_type->SetMol_type( eGIBB_mol_peptide); bio_seq->SetDescr().Set().push_back(mol_type); // set the instance bio_seq->SetInst(*seq_inst); // wrap this Bio_seq in an entry CRef<CSeq_entry> seq_entry(new CSeq_entry); seq_entry->SetSeq(*bio_seq); // add this entry to our Bioseq_set bioseq_set->SetSeq_set().push_back(seq_entry); // record it as product in the annot we're handed CRef<CSeq_loc> prod_loc(new CSeq_loc); prod_loc->SetWhole(*id); (*feat)->SetProduct(*prod_loc); } return bioseq_set; }