Ejemplo n.º 1
0
void CSeq_data::DoConstruct(const string& value, E_Choice index)
{

    switch (index) {
    case e_Iupacna:
        SetIupacna() = CIUPACna(value);
        break;
    case e_Iupacaa:
        SetIupacaa() = CIUPACaa(value);
        break;
    case e_Ncbieaa:
        SetNcbieaa() = CNCBIeaa(value);
        break;
    default:
        // throw an error
        NCBI_THROW (CException, eUnknown,
            "CSeq_data::DoConstruct: Invalid E_Choice index");
    }
}
Ejemplo n.º 2
0
//
// CreateConsensus()
//
// compute a consensus sequence given a particular alignment
// the rules for a consensus are:
//   - a segment is consensus gap if > 50% of the sequences are gap at this
//     segment.  50% exactly is counted as sequence
//   - for a segment counted as sequence, for each position, the most
//     frequently occurring base is counted as consensus.  in the case of
//     a tie, the consensus is considered muddied, and the consensus is
//     so marked
//
CRef<CDense_seg>
CAlnVec::CreateConsensus(int& consensus_row, CBioseq& consensus_seq,
                         const CSeq_id& consensus_id) const
{
    consensus_seq.Reset();
    if ( !m_DS || m_NumRows < 1) {
        return CRef<CDense_seg>();
    }

    bool isNucleotide = GetBioseqHandle(0).IsNucleotide();

    size_t i;
    size_t j;

    // temporary storage for our consensus
    vector<string> consens(m_NumSegs);

    CreateConsensus(consens);

    //
    // now, create a new CDense_seg
    // we create a new CBioseq for our data and
    // copy the contents of the CDense_seg
    //
    string data;
    TSignedSeqPos total_bases = 0;

    CRef<CDense_seg> new_ds(new CDense_seg());
    new_ds->SetDim(m_NumRows + 1);
    new_ds->SetNumseg(m_NumSegs);
    new_ds->SetLens() = m_Lens;
    new_ds->SetStarts().reserve(m_Starts.size() + m_NumSegs);
    if ( !m_Strands.empty() ) {
        new_ds->SetStrands().reserve(m_Strands.size() +
                                     m_NumSegs);
    }

    for (i = 0;  i < consens.size();  ++i) {
        // copy the old entries
        for (j = 0;  j < (size_t)m_NumRows;  ++j) {
            int idx = i * m_NumRows + j;
            new_ds->SetStarts().push_back(m_Starts[idx]);
            if ( !m_Strands.empty() ) {
                new_ds->SetStrands().push_back(m_Strands[idx]);
            }
        }

        // add our new entry
        // this places the consensus as the last sequence
        // it should preferably be the first, but this would mean adjusting
        // the bioseq handle and seqvector caches, and all row numbers would
        // shift
        if (consens[i].length() != 0) {
            new_ds->SetStarts().push_back(total_bases);
        } else {
            new_ds->SetStarts().push_back(-1);
        }
        
        if ( !m_Strands.empty() ) {
            new_ds->SetStrands().push_back(eNa_strand_unknown);
        }

        total_bases += consens[i].length();
        data += consens[i];
    }

    // copy our IDs
    for (i = 0;  i < m_Ids.size();  ++i) {
        new_ds->SetIds().push_back(m_Ids[i]);
    }

    // now, we construct a new Bioseq
    {{

         // sequence ID
         CRef<CSeq_id> id(new CSeq_id());
         id->Assign(consensus_id);
         consensus_seq.SetId().push_back(id);

         new_ds->SetIds().push_back(id);

         // add a description for this sequence
         CSeq_descr& desc = consensus_seq.SetDescr();
         CRef<CSeqdesc> d(new CSeqdesc);
         desc.Set().push_back(d);
         d->SetComment("This is a generated consensus sequence");

         // the main one: Seq-inst
         CSeq_inst& inst = consensus_seq.SetInst();
         inst.SetRepr(CSeq_inst::eRepr_raw);
         inst.SetMol(isNucleotide ? CSeq_inst::eMol_na : CSeq_inst::eMol_aa);
         inst.SetLength(data.length());

         CSeq_data& seq_data = inst.SetSeq_data();
         if (isNucleotide) {
             CIUPACna& na = seq_data.SetIupacna();
             na = CIUPACna(data);
         } else {
             CIUPACaa& aa = seq_data.SetIupacaa();
             aa = CIUPACaa(data);
         }
    }}

    consensus_row = new_ds->GetIds().size() - 1;
    return new_ds;
}