//------------------------------------------------------------------------------
void CheckDuplicates( const vector< string > & input,
                      const string & infmt,
                      const CWinMaskUtil::CIdSet * ids,
                      const CWinMaskUtil::CIdSet * exclude_ids )
{
    typedef vector< string >::const_iterator input_iterator;

    dup_lookup_table table;
    CRef<CObjectManager> om(CObjectManager::GetInstance());

    for( input_iterator i( input.begin() ); i != input.end(); ++i )
    {
        Uint4 seqnum( 0 );

        for(CWinMaskUtil::CInputBioseq_CI bs_iter(*i, infmt); bs_iter; ++bs_iter)
        {
            CBioseq_Handle bsh = *bs_iter;

            if( CWinMaskUtil::consider( bsh, ids, exclude_ids ) )
            {
                TSeqPos data_len = bsh.GetBioseqLength();
                if( data_len < MIN_SEQ_LENGTH )
                    continue;

                string id;
                sequence::GetId(bsh, sequence::eGetId_Best)
                    .GetSeqId()->GetLabel(&id);
                data_len -= SAMPLE_SKIP;
                tracker track( table, id );

                string index;
                CSeqVector data =
                    bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
                for( TSeqPos i = 0;  i < data_len;  ++i )
                {
                    index.erase();
                    data.GetSeqData(i, i + SAMPLE_LENGTH, index);
                    const dup_lookup_table::sample * sample( table[index] );

                    if( sample != 0 )
                        track( index, seqnum, i, sample->begin(), sample->end() );
                }

                table.add_seq_info( id, data );
                ++seqnum;
            }
        }
    }
}
//-------------------------------------------------------------------------
void CMaskWriterFasta::Print( objects::CBioseq_Handle& bsh,
                              const TMaskList & mask,
                              bool parsed_id )
{
    PrintId( bsh, parsed_id );
    os << endl;
    CSeqVector data = bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac);

    /// FIXME: this can be implemented as a call to CFastaOstream, which
    /// understands masking via a seq-loc

    // if( dest->GetIupacna().CanGet() )
    if( true )
    {
        string accumulator;
        TMaskList::const_iterator imask = mask.begin();

        for( TSeqPos i = 0; i < data.size(); ++i )
        {
            char letter = data[i];

            if( imask != mask.end() && i >= imask->first ) {
                if( i <= imask->second ) 
                    letter = tolower((unsigned char) letter);
                else
                {
                    ++imask;

                    if(    imask != mask.end() 
                        && i >= imask->first && i <= imask->second )
                        letter = tolower((unsigned char) letter);
                }
            }

            accumulator.append( 1, letter );

            if( !((i + 1)%60) )
            {
                os << accumulator << "\n";
                accumulator = "";
            }
        }

        if( accumulator.length() ) os << accumulator << "\n";
    }
}
Beispiel #3
0
void CNucProp::CountNmers(CSeqVector& seqvec, int n, vector<int>& table)
{
    TSeqPos len = seqvec.size();

    table.resize(NumberOfNmers(n));

    // clear table
    for (int i = 0;  i < NumberOfNmers(n);  i++) {
        table[i] = 0;
    }

    string seq_string;
    seqvec.GetSeqData(0, len, seq_string);
    const char *seq;
    seq = seq_string.data();

    for (TSeqPos i = 0;  i <= len-n;  ++i) {
        int nmerint = Nmer2Int(seq+i, n);
        if (nmerint >= 0) {   // if no ambiguity chars
            table[nmerint]++;
        }
    }
}
Beispiel #4
0
void CSeqVector_CI::x_SetVector(CSeqVector& seq_vector)
{
    if ( m_SeqMap ) {
        // reset old values
        m_Seg = CSeqMap_CI();
        x_ResetCache();
        x_ResetBackup();
    }

    m_Scope  = seq_vector.m_Scope;
    m_SeqMap = seq_vector.m_SeqMap;
    m_TSE = seq_vector.m_TSE;
    m_Strand = seq_vector.m_Strand;
    m_Coding = seq_vector.m_Coding;
    m_CachePos = seq_vector.size();
    m_Randomizer = seq_vector.m_Randomizer;
    m_ScannedStart = m_ScannedEnd = 0;
}
Beispiel #5
0
int CNucProp::GetPercentGC(const CSeqVector& seqvec)
{
    TSeqPos gc_count = 0;
    TSeqPos len = seqvec.size();

    for (TSeqPos i = 0;  i < len;  ++i) {
        switch (seqvec[i]) {
        case 'C':
        case 'G':
        case 'S':
            ++gc_count;
            break;
        default:
            break;
        }
    }

    return (int) ((gc_count * 100.0) / len + 0.5);
}
Beispiel #6
0
void CSimpleOM::GetIupac(string& result, const CSeq_id& id, ENa_strand strand)
{
    CSeqVector vec = GetSeqVector(id, strand);
    vec.SetIupacCoding();
    vec.GetSeqData(0, vec.size(), result);
}
Beispiel #7
0
void CSimpleOM::GetIupac(string& result, TGi gi, ENa_strand strand)
{
    CSeqVector vec = GetSeqVector(gi, strand);
    vec.SetIupacCoding();
    vec.GetSeqData(0, vec.size(), result);
}