Esempio n. 1
0
    /// Performs sanity checks to make sure that the sequence requested is of
    /// the expected type. If the tests fail, an exception is thrown.
    /// @param id Sequence id for this sequence [in]
    void x_ValidateMoleculeType(CConstRef<CSeq_id> id) 
    {
        _ASSERT(m_BioseqMaker.NotEmpty());

        if (id.Empty())
        {
            NCBI_THROW(CInputException, eInvalidInput,
                       "Empty SeqID passed to the molecule type validation");
        }

        bool isProtein = m_BioseqMaker->IsProtein(id);
        if (!isProtein && m_ReadProteins)
        {
            NCBI_THROW(CInputException, eSequenceMismatch,
               "GI/accession/sequence mismatch: protein input required but nucleotide provided");
        }
        if (isProtein && !m_ReadProteins)
        {
            NCBI_THROW(CInputException, eSequenceMismatch,
               "GI/accession/sequence mismatch: nucleotide input required but protein provided");
        }

        if (!isProtein)  // Never seen a virtual protein sequence.
        {
             if (m_BioseqMaker->HasSequence(id) == false)
             {
                  string message = "No sequence available for " + id->AsFastaString();
                  NCBI_THROW(CInputException, eInvalidInput, message);
             }
        }
    }
pair<double, bool> CScoreUniqSeqCoverage::MakeScore(CBioseq_Handle const& query_handle, vector<CSeq_align const*>::const_iterator begin, vector<CSeq_align const*>::const_iterator end)
{
    CConstRef<CBioseq> bioseq = query_handle.GetCompleteBioseq();

    unsigned int qlen = 0;
    if ( !bioseq.Empty() && bioseq->IsSetLength()) {
        qlen = bioseq->GetLength();
    }

    if ( !qlen ) {
        return make_pair(0, false);
    }

    bool isDenDiag = ( (*begin)->GetSegs().Which() == CSeq_align::C_Segs::e_Dendiag) ?
                              true : false;

    CRangeCollection<TSeqPos> subj_rng_coll((*begin)->GetSeqRange(1));
    CRange<TSeqPos> q_rng((*begin)->GetSeqRange(0));
    
    CRangeCollection<TSeqPos> query_rng_coll(s_FixMinusStrandRange(q_rng));
    
    for( ++begin; begin != end; ++begin ) {
        const CRange<TSeqPos> align_subj_rng((*begin)->GetSeqRange(1));
        // subject range should always be on the positive strand
        assert(align_subj_rng.GetTo() > align_subj_rng.GetFrom());
        CRangeCollection<TSeqPos> coll(align_subj_rng);
        coll.Subtract(subj_rng_coll);

        if ( coll.empty() ) {
            continue;
        }

        if(coll[0] == align_subj_rng) {
            CRange<TSeqPos> query_rng ((*begin)->GetSeqRange(0));
            query_rng_coll += s_FixMinusStrandRange(query_rng);
            subj_rng_coll += align_subj_rng;
        }
        else {
            ITERATE (CRangeCollection<TSeqPos>, uItr, coll) {
                CRange<TSeqPos> query_rng;
                const CRange<TSeqPos> & subj_rng = (*uItr);
                CRef<CSeq_align> densegAln;
                if ( isDenDiag) {
                    densegAln = CreateDensegFromDendiag(**begin);
                }

                CAlnMap map( (isDenDiag) ? densegAln->GetSegs().GetDenseg() : (*begin)->GetSegs().GetDenseg());
                TSignedSeqPos subj_aln_start =  map.GetAlnPosFromSeqPos(1,subj_rng.GetFrom());
                TSignedSeqPos subj_aln_end =  map.GetAlnPosFromSeqPos(1,subj_rng.GetTo());
                query_rng.SetFrom(map.GetSeqPosFromAlnPos(0,subj_aln_start));
                query_rng.SetTo(map.GetSeqPosFromAlnPos(0,subj_aln_end));

                query_rng_coll += s_FixMinusStrandRange(query_rng);
                subj_rng_coll += subj_rng;
            }
        }
    }
Esempio n. 3
0
bool CLocation_constraint :: Match(const CSeq_feat& feat, CConstRef <CSeq_feat> feat_to, CConstRef <CBioseq> feat_bioseq) const
{
  if (x_IsLocationConstraintEmpty()) {
     return true;
  }
 
  const CSeq_loc& feat_loc = feat.GetLocation();
  if (GetStrand() != eStrand_constraint_any) {
    if (feat_bioseq.Empty()) {
       return false;
    }
    else if (feat_bioseq->IsAa()) {
      if (feat_to.Empty()) {  // when feat is product, feat_to points to cds
         return false;
      }
      else if (!x_DoesStrandMatchConstraint (feat_to->GetLocation())) {
        return false;
      }
    }
    else if (!x_DoesStrandMatchConstraint (feat_loc)) {
        return false;
    }
  }

  if (!x_DoesBioseqMatchSequenceType(feat_bioseq, GetSeq_type())) {
     return false;
  }

  if (!x_DoesLocationMatchPartialnessConstraint (feat_loc)) {
     return false;
  }

  if (!x_DoesLocationMatchTypeConstraint (feat_loc)) {
     return false;
  }

  if (!x_DoesLocationMatchDistanceConstraint(feat_bioseq, feat_loc)) {
     return false;
  }

  return true;
};
Esempio n. 4
0
bool CLocation_constraint :: x_DoesLocationMatchDistanceConstraint(CConstRef <CBioseq> bioseq, const CSeq_loc& loc) const
{
  if (!CanGetEnd5() && !CanGetEnd3()) {
      return true;
  }

  unsigned pos = loc.GetStop(eExtreme_Positional);
  int pos2;
  if (bioseq.NotEmpty()) {
     pos2 = (bioseq->IsSetLength() ?  bioseq->GetLength() : 0) - pos - 1;
  }

  if (loc.GetStrand() == eNa_strand_minus) {
    if (CanGetEnd5()) {
      if (bioseq.Empty()) {
          return false;
      }
      else {
        if (!GetEnd5().Match(pos2)) {
           return false;
        }
      }
    }
    if (CanGetEnd3()) {
        return GetEnd3().Match(pos);
    }
  }
  else
  {
    if (CanGetEnd5() && !GetEnd5().Match(pos)) {
        return false;
    }
    if (CanGetEnd3()) {
      if (bioseq.Empty()) {
         return false;
      }
      return GetEnd3().Match(pos2);
    }
  }
  return true;
};
Esempio n. 5
0
void
CCmdLineBlastXML2ReportData::x_InitCommon(
	const CSearchResults & results,
    CConstRef<CBlastOptions> opts)
{
	if(opts.Empty()) {
		NCBI_THROW(CException, eUnknown, "blastxml2: Empty blast options");
	}

	if(m_Scope.Empty()) {
		NCBI_THROW(CException, eUnknown, "blastxml2: Empty scope");
	}

	x_FillScoreMatrix(m_Options->GetMatrixName());

	string resolved = SeqDB_ResolveDbPath("taxdb.bti");
	if(!resolved.empty()) {
		m_TaxDBFound = true;
	}

  	m_isIterative  = opts->IsIterativeSearch();
}
pair<double, bool> CScoreSeqCoverage::MakeScore(CBioseq_Handle const& query_handle, vector<CSeq_align const*>::const_iterator begin, vector<CSeq_align const*>::const_iterator end) 
{
    CConstRef<CBioseq> bioseq = query_handle.GetCompleteBioseq();

    unsigned int qlen = 0;
    if ( !bioseq.Empty() && bioseq->IsSetLength()) {
        qlen = bioseq->GetLength();
    }

    if ( !qlen ) {
        return make_pair(0, false);
    }

    // Subject coverage score
    CRangeCollection<TSeqPos> range_coll;

    for ( ; begin != end; ++begin ) {
        CRange<TSeqPos> range = (*begin)->GetSeqRange(0);
        s_FixMinusStrandRange(range);
        range_coll += range;            
    } 
    double score = ( 100.0 * range_coll.GetCoveredLength() ) / qlen;
    return make_pair(score, true);
}