CRef<CSeq_entry> CAlnReader::GetSeqEntry() { if (m_Entry) { return m_Entry; } else if ( !m_ReadDone ) { NCBI_THROW2(CObjReaderParseException, eFormat, "CAlnReader::GetSeqEntry(): " "Seq_entry is not available until after Read()", 0); } m_Entry = new CSeq_entry(); CRef<CSeq_annot> seq_annot (new CSeq_annot); seq_annot->SetData().SetAlign().push_back(GetSeqAlign()); m_Entry->SetSet().SetClass(CBioseq_set::eClass_pop_set); m_Entry->SetSet().SetAnnot().push_back(seq_annot); CBioseq_set::TSeq_set& seq_set = m_Entry->SetSet().SetSeq_set(); typedef CDense_seg::TDim TNumrow; for (TNumrow row_i = 0; row_i < m_Dim; row_i++) { const string& seq_str = m_SeqVec[row_i]; const size_t& seq_str_len = seq_str.size(); CRef<CSeq_entry> seq_entry (new CSeq_entry); // seq-id(s) CBioseq::TId& ids = seq_entry->SetSeq().SetId(); CSeq_id::ParseFastaIds(ids, m_Ids[row_i], true); if (ids.empty()) { ids.push_back(CRef<CSeq_id>(new CSeq_id(CSeq_id::e_Local, m_Ids[row_i]))); } // mol CSeq_inst::EMol mol = CSeq_inst::eMol_not_set; CSeq_id::EAccessionInfo ai = ids.front()->IdentifyAccession(); if (ai & CSeq_id::fAcc_nuc) { mol = CSeq_inst::eMol_na; } else if (ai & CSeq_id::fAcc_prot) { mol = CSeq_inst::eMol_aa; } else { switch (CFormatGuess::SequenceType(seq_str.data(), seq_str_len)) { case CFormatGuess::eNucleotide: mol = CSeq_inst::eMol_na; break; case CFormatGuess::eProtein: mol = CSeq_inst::eMol_aa; break; default: break; } } // seq-inst CRef<CSeq_inst> seq_inst (new CSeq_inst); seq_entry->SetSeq().SetInst(*seq_inst); seq_set.push_back(seq_entry); // repr seq_inst->SetRepr(CSeq_inst::eRepr_raw); // mol seq_inst->SetMol(mol); // len _ASSERT(seq_str_len == m_SeqLen[row_i]); seq_inst->SetLength(seq_str_len); // data CSeq_data& data = seq_inst->SetSeq_data(); if (mol == CSeq_inst::eMol_aa) { data.SetIupacaa().Set(seq_str); } else { data.SetIupacna().Set(seq_str); CSeqportUtil::Pack(&data); } } return m_Entry; }
int CLocalFinderApp::Run(void) { CArgs myargs = GetArgs(); int left = myargs["from"].AsInteger(); int right = myargs["to"].AsInteger(); bool repeats = myargs["rep"]; // // read our sequence data // CFastaReader fastareader(myargs["input"].AsString()); CRef<CSeq_loc> masked_regions; masked_regions = fastareader.SaveMask(); CRef<CSeq_entry> se = fastareader.ReadOneSeq(); if(masked_regions) { CBioseq& bioseq = se->SetSeq(); // assumes that reader gets only one sequence per fasta id (no [] in file) CRef<CSeq_annot> seq_annot(new CSeq_annot); seq_annot->SetNameDesc("NCBI-FASTA-Lowercase"); bioseq.SetAnnot().push_back(seq_annot); CSeq_annot::C_Data::TFtable* feature_table = &seq_annot->SetData().SetFtable(); for(CSeq_loc_CI i(*masked_regions); i; ++i) { CRef<CSeq_feat> repeat(new CSeq_feat); CRef<CSeq_id> id(new CSeq_id); id->Assign(i.GetSeq_id()); CRef<CSeq_loc> loc(new CSeq_loc(*id, i.GetRange().GetFrom(), i.GetRange().GetTo())); repeat->SetLocation(*loc); repeat->SetData().SetImp().SetKey("repeat_region"); feature_table->push_back(repeat); } } CRef<CObjectManager> objmgr = CObjectManager::GetInstance(); CScope scope(*objmgr); scope.AddTopLevelSeqEntry(*se); CRef<CSeq_id> cntg(new CSeq_id); cntg->Assign(*se->GetSeq().GetFirstId()); CSeq_loc loc; loc.SetWhole(*cntg); CSeqVector vec(loc, scope); vec.SetIupacCoding(); CResidueVec seq; ITERATE(CSeqVector,i,vec) seq.push_back(*i); // read the alignment information TGeneModelList alignments; if(myargs["align"]) { CNcbiIstream& alignmentfile = myargs["align"].AsInputFile(); string our_contig = cntg->GetSeqIdString(true); string cur_contig; CAlignModel algn; while(alignmentfile >> algn >> getcontig(cur_contig)) { if (cur_contig==our_contig) alignments.push_back(algn); } } // create engine CRef<CHMMParameters> hmm_params(new CHMMParameters(myargs["model"].AsInputFile())); CGnomonEngine gnomon(hmm_params, seq, TSignedSeqRange(left, right)); // run! gnomon.Run(alignments, repeats, true, true, false, false, 10.0); // dump the annotation CRef<CSeq_annot> annot = gnomon.GetAnnot(*cntg); auto_ptr<CObjectOStream> os(CObjectOStream::Open(eSerial_AsnText, cout)); *os << *annot; return 0; }