TSeqLocVector CBlastInput::GetNextSeqLocBatch(CScope& scope) { TSeqLocVector retval; TSeqPos size_read = 0; while (size_read < GetBatchSize()) { if (End()) break; try { retval.push_back(m_Source->GetNextSSeqLoc(scope)); } catch (const CObjReaderParseException& e) { if (e.GetErrCode() == CObjReaderParseException::eEOF) { break; } throw; } SSeqLoc& loc = retval.back(); if (loc.seqloc->IsInt()) { size_read += sequence::GetLength(loc.seqloc->GetInt().GetId(), loc.scope); } else if (loc.seqloc->IsWhole()) { size_read += sequence::GetLength(loc.seqloc->GetWhole(), loc.scope); } else { // programmer error, CBlastInputSource should only return Seq-locs // of type interval or whole abort(); } } return retval; }
static CSearchResultSet RunMultipleProteinSearch(ISearchFactory& factory, const string& impl) { // Obtain the search components from the factory CRef<ISeqSearch> uniform_search = factory.GetSeqSearch(); CRef<CBlastOptionsHandle> options = factory.GetOptions(eBlastp); CConstRef<CSearchDatabase> subject (new CSearchDatabase("ecoli.aa", CSearchDatabase::eBlastDbIsProtein)); // Set up the queries TSeqLocVector queries; CSeq_id query_id0(CSeq_id::e_Gi, 129295); auto_ptr<SSeqLoc> sl0(CTestObjMgr::Instance().CreateSSeqLoc(query_id0)); queries.push_back(*sl0); CSeq_id query_id1(CSeq_id::e_Gi, 129296); auto_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(query_id1)); queries.push_back(*sl1); CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries)); options->SetEvalueThreshold(1.0); options->SetHitlistSize(25); // Configure and run the uniform search object uniform_search->SetQueryFactory(query_factory); uniform_search->SetSubject(subject); uniform_search->SetOptions(options); CSearchResultSet retval = *uniform_search->Run(); return retval; }
void x_SetupSubject(CConstRef<CBioseq> bioseq) { TSeqLocVector subjects; m_Scope.Reset(new CScope(*CObjectManager::GetInstance())); CConstRef<CSeq_id> sid = (m_Scope->AddBioseq(*bioseq)).GetSeqId(); CRef<CSeq_loc> sl(new CSeq_loc()); sl->SetWhole(); sl->SetId(*sid); SSeqLoc ssl(*sl, *m_Scope); subjects.push_back(ssl); m_Subject.Reset(new CObjMgr_QueryFactory(subjects)); }
SSeqLoc blast_sseq_loc_from_fasta::make(const char* fasta_str, std::size_t s, ENa_strand strand, std::size_t from, std::size_t to, bool get_lcase_mask) { std::istringstream is(fasta_str); TSeqLocVector vec = BLASTGetSeqLocFromStream(is, *_objmngr, strand, from, to, &_counter, get_lcase_mask); if (s < 0 ) { throw std::runtime_error("Only non negative sequence selections are allowed."); } if (vec.size() <= s ) { throw std::runtime_error("Could not read requested sequence from fasta file."); } return vec[s]; }
/// Converts a list of Bioseqs into a TSeqLocVector. All Bioseqs are added to /// the same CScope object /// @param subjects Bioseqs to convert static TSeqLocVector s_ConvertBioseqs2TSeqLocVector(const CBlast4_subject::TSequences& subjects) { TSeqLocVector retval; CRef<CScope> subj_scope(new CScope(*CObjectManager::GetInstance())); ITERATE(CBlast4_subject::TSequences, bioseq, subjects) { subj_scope->AddBioseq(**bioseq); CRef<CSeq_id> seqid = FindBestChoice((*bioseq)->GetId(), CSeq_id::BestRank); const TSeqPos length = (*bioseq)->GetInst().GetLength(); CRef<CSeq_loc> sl(new CSeq_loc(*seqid, 0, length-1)); retval.push_back(SSeqLoc(sl, subj_scope)); }
void setupQueryAndSubject(int query_gi, int subject_gi) { CRef<CSeq_loc> query_loc(new CSeq_loc()); query_loc->SetWhole().SetGi(query_gi); CScope* query_scope = new CScope(CTestObjMgr::Instance().GetObjMgr()); query_scope->AddDefaults(); m_vQuery.push_back(SSeqLoc(query_loc, query_scope)); CRef<CSeq_loc> subject_loc(new CSeq_loc()); subject_loc->SetWhole().SetGi(subject_gi); CScope* subject_scope = new CScope(CTestObjMgr::Instance().GetObjMgr()); subject_scope->AddDefaults(); m_vSubject.push_back(SSeqLoc(subject_loc, subject_scope)); }
void x_SetupSubject(CConstRef<CBioseq_set> bioseq_set) { TSeqLocVector subjects; m_Scope.Reset(new CScope(*CObjectManager::GetInstance())); CTypeConstIterator<CBioseq> itr(ConstBegin(*bioseq_set, eDetectLoops)); for (; itr; ++itr) { CConstRef<CSeq_id> sid = (m_Scope->AddBioseq(*itr)).GetSeqId(); CRef<CSeq_loc> sl(new CSeq_loc()); sl->SetWhole(); sl->SetId(*sid); SSeqLoc ssl(*sl, *m_Scope); subjects.push_back(ssl); } m_Subject.Reset(new CObjMgr_QueryFactory(subjects)); }
TSeqLocVector CBlastInput::GetAllSeqLocs(CScope& scope) { TSeqLocVector retval; while (!End()) { try { retval.push_back(m_Source->GetNextSSeqLoc(scope)); } catch (const CObjReaderParseException& e) { if (e.GetErrCode() == CObjReaderParseException::eEOF) { break; } throw; } } return retval; }
CQueryFactoryInfo::CQueryFactoryInfo(const TSeqLocVector& subj_seqs, EBlastProgramType program) : m_IsProt(Blast_SubjectIsProtein(program) ? true : false), m_MaxLength(0), m_MinLength(1), m_AvgLength(0), m_QuerySource(0), m_NumSeqs(subj_seqs.size()) { // Fix subject location for tblast[nx]. if (Blast_SubjectIsTranslated(program)) { TSeqLocVector temp_slv; vector<Int2> strand_v; ITERATE(TSeqLocVector, iter, subj_seqs) { strand_v.push_back((Int2) (*iter).seqloc->GetStrand()); CRef<CSeq_loc> sl(new CSeq_loc); sl->Assign(*((*iter).seqloc)); sl->SetStrand(eNa_strand_both); if ((*iter).mask) { CRef<CSeq_loc> mask_sl(new CSeq_loc); mask_sl->Assign(*((*iter).mask)); SSeqLoc sseq_loc(*sl, *((*iter).scope), *mask_sl); temp_slv.push_back(sseq_loc); } else { SSeqLoc sseq_loc(*sl, *((*iter).scope)); temp_slv.push_back(sseq_loc); } } SetupSubjects(temp_slv, program, &m_SeqBlkVector, &m_MaxLength); int index=0; ITERATE(vector<Int2>, s_iter, strand_v) { m_SeqBlkVector[index++]->subject_strand = *s_iter; } }
~BlastEngineTestFixture() { m_vQuery.clear(); m_vSubject.clear(); }
/** Reads FASTA file and creates a TSeqLocVector type for the read sequence(s). * Restricts sequences to an interval, if requested. * @param in Input file stream [in] * @param objmgr Object manager reference [in] * @param strand What strand to use if it is a nucleotide sequence [in] * @param from Starting offset of an interval [in] * @param to Ending offset of an interval (end of sequence if 0) [in] * @param counter What index to start assigning local ids from? First unused * index on exit. [in] [out] * @param get_lcase_mask Should lower case be masked? [in] * @return Vector of sequence location structures. */ TSeqLocVector BLASTGetSeqLocFromStream(std::istream& in, CObjectManager& objmgr, ENa_strand strand, int from, int to, int *counter, bool get_lcase_mask) { TSeqLocVector retval; CRef<CSeq_entry> seq_entry; vector<CConstRef<CSeq_loc> > lcase_mask; CRef<CScope> scope(new CScope(objmgr)); scope->AddDefaults(); if (get_lcase_mask) { if ( !(seq_entry = ReadFasta(in, fReadFasta_AllSeqIds, counter, &lcase_mask))) throw std::runtime_error("Could not retrieve seq entry"); } else { if ( !(seq_entry = ReadFasta(in, fReadFasta_AllSeqIds, counter))) throw std::runtime_error("Could not retrieve seq entry"); } int index = 0; scope->AddTopLevelSeqEntry(*seq_entry); from = std::max(from - 1, 0); to = std::max(to - 1, 0); for (CTypeConstIterator<CBioseq> itr(ConstBegin(*seq_entry)); itr; ++itr) { CRef<CSeq_loc> seqloc(new CSeq_loc()); TSeqPos seq_length = ncbi::objects::sequence::GetLength(*itr->GetId().front(), scope) - 1; if (to > 0 && to < seq_length) seqloc->SetInt().SetTo(to); else seqloc->SetInt().SetTo(seq_length); if (from > 0 && from < seq_length && from < to) seqloc->SetInt().SetFrom(from); else seqloc->SetInt().SetFrom(0); seqloc->SetInt().SetStrand(strand); seqloc->SetInt().SetId().Assign(*itr->GetId().front()); //CRef<CScope> s(scope); SSeqLoc sl(seqloc, scope); if (get_lcase_mask) { #if 0 sl.mask.Reset(lcase_mask[index++]); #else CSeq_loc* cs = const_cast<CSeq_loc*>(lcase_mask[index++].GetPointer()); sl.mask.Reset(cs); #endif } retval.push_back(sl); } return retval; }