//------------------------------------------------------------------------------ void CheckDuplicates( const vector< string > & input, const string & infmt, const CWinMaskUtil::CIdSet * ids, const CWinMaskUtil::CIdSet * exclude_ids ) { typedef vector< string >::const_iterator input_iterator; dup_lookup_table table; CRef<CObjectManager> om(CObjectManager::GetInstance()); for( input_iterator i( input.begin() ); i != input.end(); ++i ) { Uint4 seqnum( 0 ); for(CWinMaskUtil::CInputBioseq_CI bs_iter(*i, infmt); bs_iter; ++bs_iter) { CBioseq_Handle bsh = *bs_iter; if( CWinMaskUtil::consider( bsh, ids, exclude_ids ) ) { TSeqPos data_len = bsh.GetBioseqLength(); if( data_len < MIN_SEQ_LENGTH ) continue; string id; sequence::GetId(bsh, sequence::eGetId_Best) .GetSeqId()->GetLabel(&id); data_len -= SAMPLE_SKIP; tracker track( table, id ); string index; CSeqVector data = bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac); for( TSeqPos i = 0; i < data_len; ++i ) { index.erase(); data.GetSeqData(i, i + SAMPLE_LENGTH, index); const dup_lookup_table::sample * sample( table[index] ); if( sample != 0 ) track( index, seqnum, i, sample->begin(), sample->end() ); } table.add_seq_info( id, data ); ++seqnum; } } } }
//------------------------------------------------------------------------- void CMaskWriterFasta::Print( objects::CBioseq_Handle& bsh, const TMaskList & mask, bool parsed_id ) { PrintId( bsh, parsed_id ); os << endl; CSeqVector data = bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac); /// FIXME: this can be implemented as a call to CFastaOstream, which /// understands masking via a seq-loc // if( dest->GetIupacna().CanGet() ) if( true ) { string accumulator; TMaskList::const_iterator imask = mask.begin(); for( TSeqPos i = 0; i < data.size(); ++i ) { char letter = data[i]; if( imask != mask.end() && i >= imask->first ) { if( i <= imask->second ) letter = tolower((unsigned char) letter); else { ++imask; if( imask != mask.end() && i >= imask->first && i <= imask->second ) letter = tolower((unsigned char) letter); } } accumulator.append( 1, letter ); if( !((i + 1)%60) ) { os << accumulator << "\n"; accumulator = ""; } } if( accumulator.length() ) os << accumulator << "\n"; } }
void CNucProp::CountNmers(CSeqVector& seqvec, int n, vector<int>& table) { TSeqPos len = seqvec.size(); table.resize(NumberOfNmers(n)); // clear table for (int i = 0; i < NumberOfNmers(n); i++) { table[i] = 0; } string seq_string; seqvec.GetSeqData(0, len, seq_string); const char *seq; seq = seq_string.data(); for (TSeqPos i = 0; i <= len-n; ++i) { int nmerint = Nmer2Int(seq+i, n); if (nmerint >= 0) { // if no ambiguity chars table[nmerint]++; } } }
void CSeqVector_CI::x_SetVector(CSeqVector& seq_vector) { if ( m_SeqMap ) { // reset old values m_Seg = CSeqMap_CI(); x_ResetCache(); x_ResetBackup(); } m_Scope = seq_vector.m_Scope; m_SeqMap = seq_vector.m_SeqMap; m_TSE = seq_vector.m_TSE; m_Strand = seq_vector.m_Strand; m_Coding = seq_vector.m_Coding; m_CachePos = seq_vector.size(); m_Randomizer = seq_vector.m_Randomizer; m_ScannedStart = m_ScannedEnd = 0; }
int CNucProp::GetPercentGC(const CSeqVector& seqvec) { TSeqPos gc_count = 0; TSeqPos len = seqvec.size(); for (TSeqPos i = 0; i < len; ++i) { switch (seqvec[i]) { case 'C': case 'G': case 'S': ++gc_count; break; default: break; } } return (int) ((gc_count * 100.0) / len + 0.5); }
void CSimpleOM::GetIupac(string& result, const CSeq_id& id, ENa_strand strand) { CSeqVector vec = GetSeqVector(id, strand); vec.SetIupacCoding(); vec.GetSeqData(0, vec.size(), result); }
void CSimpleOM::GetIupac(string& result, TGi gi, ENa_strand strand) { CSeqVector vec = GetSeqVector(gi, strand); vec.SetIupacCoding(); vec.GetSeqData(0, vec.size(), result); }