void CSeq_data::DoConstruct(const string& value, E_Choice index) { switch (index) { case e_Iupacna: SetIupacna() = CIUPACna(value); break; case e_Iupacaa: SetIupacaa() = CIUPACaa(value); break; case e_Ncbieaa: SetNcbieaa() = CNCBIeaa(value); break; default: // throw an error NCBI_THROW (CException, eUnknown, "CSeq_data::DoConstruct: Invalid E_Choice index"); } }
// // CreateConsensus() // // compute a consensus sequence given a particular alignment // the rules for a consensus are: // - a segment is consensus gap if > 50% of the sequences are gap at this // segment. 50% exactly is counted as sequence // - for a segment counted as sequence, for each position, the most // frequently occurring base is counted as consensus. in the case of // a tie, the consensus is considered muddied, and the consensus is // so marked // CRef<CDense_seg> CAlnVec::CreateConsensus(int& consensus_row, CBioseq& consensus_seq, const CSeq_id& consensus_id) const { consensus_seq.Reset(); if ( !m_DS || m_NumRows < 1) { return CRef<CDense_seg>(); } bool isNucleotide = GetBioseqHandle(0).IsNucleotide(); size_t i; size_t j; // temporary storage for our consensus vector<string> consens(m_NumSegs); CreateConsensus(consens); // // now, create a new CDense_seg // we create a new CBioseq for our data and // copy the contents of the CDense_seg // string data; TSignedSeqPos total_bases = 0; CRef<CDense_seg> new_ds(new CDense_seg()); new_ds->SetDim(m_NumRows + 1); new_ds->SetNumseg(m_NumSegs); new_ds->SetLens() = m_Lens; new_ds->SetStarts().reserve(m_Starts.size() + m_NumSegs); if ( !m_Strands.empty() ) { new_ds->SetStrands().reserve(m_Strands.size() + m_NumSegs); } for (i = 0; i < consens.size(); ++i) { // copy the old entries for (j = 0; j < (size_t)m_NumRows; ++j) { int idx = i * m_NumRows + j; new_ds->SetStarts().push_back(m_Starts[idx]); if ( !m_Strands.empty() ) { new_ds->SetStrands().push_back(m_Strands[idx]); } } // add our new entry // this places the consensus as the last sequence // it should preferably be the first, but this would mean adjusting // the bioseq handle and seqvector caches, and all row numbers would // shift if (consens[i].length() != 0) { new_ds->SetStarts().push_back(total_bases); } else { new_ds->SetStarts().push_back(-1); } if ( !m_Strands.empty() ) { new_ds->SetStrands().push_back(eNa_strand_unknown); } total_bases += consens[i].length(); data += consens[i]; } // copy our IDs for (i = 0; i < m_Ids.size(); ++i) { new_ds->SetIds().push_back(m_Ids[i]); } // now, we construct a new Bioseq {{ // sequence ID CRef<CSeq_id> id(new CSeq_id()); id->Assign(consensus_id); consensus_seq.SetId().push_back(id); new_ds->SetIds().push_back(id); // add a description for this sequence CSeq_descr& desc = consensus_seq.SetDescr(); CRef<CSeqdesc> d(new CSeqdesc); desc.Set().push_back(d); d->SetComment("This is a generated consensus sequence"); // the main one: Seq-inst CSeq_inst& inst = consensus_seq.SetInst(); inst.SetRepr(CSeq_inst::eRepr_raw); inst.SetMol(isNucleotide ? CSeq_inst::eMol_na : CSeq_inst::eMol_aa); inst.SetLength(data.length()); CSeq_data& seq_data = inst.SetSeq_data(); if (isNucleotide) { CIUPACna& na = seq_data.SetIupacna(); na = CIUPACna(data); } else { CIUPACaa& aa = seq_data.SetIupacaa(); aa = CIUPACaa(data); } }} consensus_row = new_ds->GetIds().size() - 1; return new_ds; }