CRef<CSeq_loc> CBlastFastaInputSource::x_FastaToSeqLoc(CRef<objects::CSeq_loc>& lcase_mask, CScope& scope) { static const TSeqRange kEmptyRange(TSeqRange::GetEmpty()); CRef<CBlastScopeSource> query_scope_source; if (m_Config.GetLowercaseMask()) lcase_mask = m_InputReader->SaveMask(); CRef<CSeq_entry> seq_entry(m_InputReader->ReadOneSeq()); if (lcase_mask) { if (lcase_mask->Which() != CSeq_loc::e_not_set) { lcase_mask->SetStrand(eNa_strand_plus); } _ASSERT(lcase_mask->GetStrand() == eNa_strand_plus || lcase_mask->GetStrand() == eNa_strand_unknown); } _ASSERT(seq_entry.NotEmpty()); scope.AddTopLevelSeqEntry(*seq_entry); CTypeConstIterator<CBioseq> itr(ConstBegin(*seq_entry)); CRef<CSeq_loc> retval(new CSeq_loc()); if ( !blast::HasRawSequenceData(*itr) ) { CBlastInputReader* blast_reader = dynamic_cast<CBlastInputReader*>(m_InputReader.get()); _ASSERT(blast_reader); CRef<CBlastScopeSource> query_scope_source = blast_reader->GetQueryScopeSource(); query_scope_source->AddDataLoaders(CRef<CScope>(&scope)); } if (m_ReadProteins && itr->IsNa()) { NCBI_THROW(CInputException, eSequenceMismatch, "Nucleotide FASTA provided for protein sequence"); } else if ( !m_ReadProteins && itr->IsAa() ) { NCBI_THROW(CInputException, eSequenceMismatch, "Protein FASTA provided for nucleotide sequence"); } // set strand if (m_Config.GetStrand() == eNa_strand_other || m_Config.GetStrand() == eNa_strand_unknown) { if (m_ReadProteins) retval->SetInt().SetStrand(eNa_strand_unknown); else retval->SetInt().SetStrand(eNa_strand_both); } else { if (m_ReadProteins) { NCBI_THROW(CInputException, eInvalidStrand, "Cannot assign nucleotide strand to protein sequence"); } retval->SetInt().SetStrand(m_Config.GetStrand()); } // sanity checks for the range const TSeqPos from = m_Config.GetRange().GetFrom() == kEmptyRange.GetFrom() ? 0 : m_Config.GetRange().GetFrom(); const TSeqPos to = m_Config.GetRange().GetTo() == kEmptyRange.GetTo() ? 0 : m_Config.GetRange().GetTo(); // Get the sequence length const TSeqPos seqlen = seq_entry->GetSeq().GetInst().GetLength(); //if (seqlen == 0) { // NCBI_THROW(CInputException, eEmptyUserInput, // "Query contains no sequence data"); //} _ASSERT(seqlen != numeric_limits<TSeqPos>::max()); if (to > 0 && to < from) { NCBI_THROW(CInputException, eInvalidRange, "Invalid sequence range"); } if (from > seqlen) { NCBI_THROW(CInputException, eInvalidRange, "Invalid from coordinate (greater than sequence length)"); } // N.B.: if the to coordinate is greater than or equal to the sequence // length, we fix that silently // set sequence range retval->SetInt().SetFrom(from); retval->SetInt().SetTo((to > 0 && to < seqlen) ? to : (seqlen-1)); // set ID retval->SetInt().SetId().Assign(*FindBestChoice(itr->GetId(), CSeq_id::BestRank)); return retval; }