//------------------------------------------------------------------------------
void CheckDuplicates( const vector< string > & input,
                      const string & infmt,
                      const CWinMaskUtil::CIdSet * ids,
                      const CWinMaskUtil::CIdSet * exclude_ids )
{
    typedef vector< string >::const_iterator input_iterator;

    dup_lookup_table table;
    CRef<CObjectManager> om(CObjectManager::GetInstance());

    for( input_iterator i( input.begin() ); i != input.end(); ++i )
    {
        Uint4 seqnum( 0 );

        for(CWinMaskUtil::CInputBioseq_CI bs_iter(*i, infmt); bs_iter; ++bs_iter)
        {
            CBioseq_Handle bsh = *bs_iter;

            if( CWinMaskUtil::consider( bsh, ids, exclude_ids ) )
            {
                TSeqPos data_len = bsh.GetBioseqLength();
                if( data_len < MIN_SEQ_LENGTH )
                    continue;

                string id;
                sequence::GetId(bsh, sequence::eGetId_Best)
                    .GetSeqId()->GetLabel(&id);
                data_len -= SAMPLE_SKIP;
                tracker track( table, id );

                string index;
                CSeqVector data =
                    bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
                for( TSeqPos i = 0;  i < data_len;  ++i )
                {
                    index.erase();
                    data.GetSeqData(i, i + SAMPLE_LENGTH, index);
                    const dup_lookup_table::sample * sample( table[index] );

                    if( sample != 0 )
                        track( index, seqnum, i, sample->begin(), sample->end() );
                }

                table.add_seq_info( id, data );
                ++seqnum;
            }
        }
    }
}
Beispiel #2
0
void CNucProp::CountNmers(CSeqVector& seqvec, int n, vector<int>& table)
{
    TSeqPos len = seqvec.size();

    table.resize(NumberOfNmers(n));

    // clear table
    for (int i = 0;  i < NumberOfNmers(n);  i++) {
        table[i] = 0;
    }

    string seq_string;
    seqvec.GetSeqData(0, len, seq_string);
    const char *seq;
    seq = seq_string.data();

    for (TSeqPos i = 0;  i <= len-n;  ++i) {
        int nmerint = Nmer2Int(seq+i, n);
        if (nmerint >= 0) {   // if no ambiguity chars
            table[nmerint]++;
        }
    }
}
Beispiel #3
0
void CSimpleOM::GetIupac(string& result, const CSeq_id& id, ENa_strand strand)
{
    CSeqVector vec = GetSeqVector(id, strand);
    vec.SetIupacCoding();
    vec.GetSeqData(0, vec.size(), result);
}
Beispiel #4
0
void CSimpleOM::GetIupac(string& result, TGi gi, ENa_strand strand)
{
    CSeqVector vec = GetSeqVector(gi, strand);
    vec.SetIupacCoding();
    vec.GetSeqData(0, vec.size(), result);
}