static CSearchResultSet
    RunMultipleProteinSearch(ISearchFactory& factory, const string& impl) 
    {
        // Obtain the search components from the factory
        CRef<ISeqSearch> uniform_search = factory.GetSeqSearch();
        CRef<CBlastOptionsHandle> options = factory.GetOptions(eBlastp);
        CConstRef<CSearchDatabase> subject
            (new CSearchDatabase("ecoli.aa", 
                                 CSearchDatabase::eBlastDbIsProtein));

        // Set up the queries
        TSeqLocVector queries;
        CSeq_id query_id0(CSeq_id::e_Gi, 129295);
        auto_ptr<SSeqLoc> sl0(CTestObjMgr::Instance().CreateSSeqLoc(query_id0));
        queries.push_back(*sl0);
        CSeq_id query_id1(CSeq_id::e_Gi, 129296);
        auto_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(query_id1));
        queries.push_back(*sl1);
        CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));

        options->SetEvalueThreshold(1.0);
        options->SetHitlistSize(25);

        // Configure and run the uniform search object
        uniform_search->SetQueryFactory(query_factory);
        uniform_search->SetSubject(subject);
        uniform_search->SetOptions(options);
        CSearchResultSet retval = *uniform_search->Run();
        return retval;
    }
 void setupQueryAndSubject(int query_gi, int subject_gi) 
 {
     CRef<CSeq_loc> query_loc(new CSeq_loc());
     query_loc->SetWhole().SetGi(query_gi);
     CScope* query_scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
     query_scope->AddDefaults();
     m_vQuery.push_back(SSeqLoc(query_loc, query_scope));
     
     CRef<CSeq_loc> subject_loc(new CSeq_loc());
     subject_loc->SetWhole().SetGi(subject_gi);
     CScope* subject_scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
     subject_scope->AddDefaults();
     m_vSubject.push_back(SSeqLoc(subject_loc, subject_scope));
 }
Beispiel #3
0
TSeqLocVector
CBlastInput::GetNextSeqLocBatch(CScope& scope)
{
    TSeqLocVector retval;
    TSeqPos size_read = 0;

    while (size_read < GetBatchSize()) {

        if (End())
            break;

        try { retval.push_back(m_Source->GetNextSSeqLoc(scope)); }
        catch (const CObjReaderParseException& e) {
            if (e.GetErrCode() == CObjReaderParseException::eEOF) {
                break;
            }
            throw;
        }
        SSeqLoc& loc = retval.back();

        if (loc.seqloc->IsInt()) {
            size_read += sequence::GetLength(loc.seqloc->GetInt().GetId(), 
                                            loc.scope);
        } else if (loc.seqloc->IsWhole()) {
            size_read += sequence::GetLength(loc.seqloc->GetWhole(),
                                            loc.scope);
        } else {
            // programmer error, CBlastInputSource should only return Seq-locs
            // of type interval or whole
            abort();
        }
    }

    return retval;
}
 void x_SetupSubject(CConstRef<CBioseq> bioseq) {
     TSeqLocVector subjects;
     m_Scope.Reset(new CScope(*CObjectManager::GetInstance()));
     CConstRef<CSeq_id> sid = (m_Scope->AddBioseq(*bioseq)).GetSeqId();
     CRef<CSeq_loc> sl(new CSeq_loc());
     sl->SetWhole();
     sl->SetId(*sid);
     SSeqLoc ssl(*sl, *m_Scope);
     subjects.push_back(ssl);
     m_Subject.Reset(new CObjMgr_QueryFactory(subjects));
 }
/// Converts a list of Bioseqs into a TSeqLocVector. All Bioseqs are added to
/// the same CScope object
/// @param subjects Bioseqs to convert
static TSeqLocVector
s_ConvertBioseqs2TSeqLocVector(const CBlast4_subject::TSequences& subjects)
{
    TSeqLocVector retval;
    CRef<CScope> subj_scope(new CScope(*CObjectManager::GetInstance()));
    ITERATE(CBlast4_subject::TSequences, bioseq, subjects) {
        subj_scope->AddBioseq(**bioseq);
        CRef<CSeq_id> seqid = FindBestChoice((*bioseq)->GetId(),
                                             CSeq_id::BestRank);
        const TSeqPos length = (*bioseq)->GetInst().GetLength();
        CRef<CSeq_loc> sl(new CSeq_loc(*seqid, 0, length-1));
        retval.push_back(SSeqLoc(sl, subj_scope));
    }
 void x_SetupSubject(CConstRef<CBioseq_set> bioseq_set) {
     TSeqLocVector subjects;
     m_Scope.Reset(new CScope(*CObjectManager::GetInstance()));
     CTypeConstIterator<CBioseq> itr(ConstBegin(*bioseq_set, eDetectLoops));
     for (; itr; ++itr) {
         CConstRef<CSeq_id> sid = (m_Scope->AddBioseq(*itr)).GetSeqId();
         CRef<CSeq_loc> sl(new CSeq_loc());
         sl->SetWhole();
         sl->SetId(*sid);
         SSeqLoc ssl(*sl, *m_Scope);
         subjects.push_back(ssl);
     }
     m_Subject.Reset(new CObjMgr_QueryFactory(subjects));
 }
Beispiel #7
0
TSeqLocVector
CBlastInput::GetAllSeqLocs(CScope& scope)
{
    TSeqLocVector retval;

    while (!End()) {
        try { retval.push_back(m_Source->GetNextSSeqLoc(scope)); }
        catch (const CObjReaderParseException& e) {
            if (e.GetErrCode() == CObjReaderParseException::eEOF) {
                break;
            }
            throw;
        }
    }

    return retval;
}
/** Reads FASTA file and creates a TSeqLocVector type for the read sequence(s). 
 * Restricts sequences to an interval, if requested. 
 * @param in Input file stream [in]
 * @param objmgr Object manager reference [in]
 * @param strand What strand to use if it is a nucleotide sequence [in]
 * @param from Starting offset of an interval [in]
 * @param to Ending offset of an interval (end of sequence if 0) [in]
 * @param counter What index to start assigning local ids from? First unused 
 *                index on exit. [in] [out]
 * @param get_lcase_mask Should lower case be masked? [in]
 * @return Vector of sequence location structures.
 */
TSeqLocVector
BLASTGetSeqLocFromStream(std::istream& in, CObjectManager& objmgr, 
                         ENa_strand strand, int from, int to, 
                         int *counter, bool get_lcase_mask)
{
  TSeqLocVector retval;
  CRef<CSeq_entry> seq_entry;

  vector<CConstRef<CSeq_loc> > lcase_mask;

  CRef<CScope> scope(new CScope(objmgr));
  scope->AddDefaults();

  if (get_lcase_mask) {
    if ( !(seq_entry = ReadFasta(in, fReadFasta_AllSeqIds, counter, 
				 &lcase_mask)))
      throw std::runtime_error("Could not retrieve seq entry");
  } else {
    if ( !(seq_entry = ReadFasta(in, fReadFasta_AllSeqIds, counter)))
      throw std::runtime_error("Could not retrieve seq entry");
  }

  int index = 0;
  scope->AddTopLevelSeqEntry(*seq_entry);

  from = std::max(from - 1, 0);
  to = std::max(to - 1, 0);

  for (CTypeConstIterator<CBioseq> itr(ConstBegin(*seq_entry)); itr; ++itr) {

    CRef<CSeq_loc> seqloc(new CSeq_loc());
    TSeqPos seq_length = ncbi::objects::sequence::GetLength(*itr->GetId().front(), 
							    scope) - 1;

    if (to > 0 && to < seq_length)
      seqloc->SetInt().SetTo(to);
    else
      seqloc->SetInt().SetTo(seq_length);

    if (from > 0 && from < seq_length && from < to)
      seqloc->SetInt().SetFrom(from);
    else
      seqloc->SetInt().SetFrom(0);

    seqloc->SetInt().SetStrand(strand);
    seqloc->SetInt().SetId().Assign(*itr->GetId().front());

    //CRef<CScope> s(scope);
    SSeqLoc sl(seqloc, scope);

    if (get_lcase_mask) {
#if 0
      sl.mask.Reset(lcase_mask[index++]);
#else
      CSeq_loc* cs = const_cast<CSeq_loc*>(lcase_mask[index++].GetPointer());
      sl.mask.Reset(cs);
#endif
    }
    retval.push_back(sl);
  }

  return retval;
}