Example #1
0
CRef<CSeq_entry> CAlnReader::GetSeqEntry()
{
    if (m_Entry) {
        return m_Entry;
    } else if ( !m_ReadDone ) {
        NCBI_THROW2(CObjReaderParseException, eFormat,
                   "CAlnReader::GetSeqEntry(): "
                   "Seq_entry is not available until after Read()", 0);
    }
    m_Entry = new CSeq_entry();
    CRef<CSeq_annot> seq_annot (new CSeq_annot);
    seq_annot->SetData().SetAlign().push_back(GetSeqAlign());

    m_Entry->SetSet().SetClass(CBioseq_set::eClass_pop_set);
    m_Entry->SetSet().SetAnnot().push_back(seq_annot);

    CBioseq_set::TSeq_set& seq_set = m_Entry->SetSet().SetSeq_set();

    typedef CDense_seg::TDim TNumrow;
    for (TNumrow row_i = 0; row_i < m_Dim; row_i++) {
        const string& seq_str     = m_SeqVec[row_i];
        const size_t& seq_str_len = seq_str.size();

        CRef<CSeq_entry> seq_entry (new CSeq_entry);

        // seq-id(s)
        CBioseq::TId& ids = seq_entry->SetSeq().SetId();
        CSeq_id::ParseFastaIds(ids, m_Ids[row_i], true);
        if (ids.empty()) {
            ids.push_back(CRef<CSeq_id>(new CSeq_id(CSeq_id::e_Local,
                                                    m_Ids[row_i])));
        }

        // mol
        CSeq_inst::EMol mol   = CSeq_inst::eMol_not_set;
        CSeq_id::EAccessionInfo ai = ids.front()->IdentifyAccession();
        if (ai & CSeq_id::fAcc_nuc) {
            mol = CSeq_inst::eMol_na;
        } else if (ai & CSeq_id::fAcc_prot) {
            mol = CSeq_inst::eMol_aa;
        } else {
            switch (CFormatGuess::SequenceType(seq_str.data(), seq_str_len)) {
            case CFormatGuess::eNucleotide:  mol = CSeq_inst::eMol_na;  break;
            case CFormatGuess::eProtein:     mol = CSeq_inst::eMol_aa;  break;
            default:                         break;
            }
        }

        // seq-inst
        CRef<CSeq_inst> seq_inst (new CSeq_inst);
        seq_entry->SetSeq().SetInst(*seq_inst);
        seq_set.push_back(seq_entry);

        // repr
        seq_inst->SetRepr(CSeq_inst::eRepr_raw);

        // mol
        seq_inst->SetMol(mol);

        // len
        _ASSERT(seq_str_len == m_SeqLen[row_i]);
        seq_inst->SetLength(seq_str_len);

        // data
        CSeq_data& data = seq_inst->SetSeq_data();
        if (mol == CSeq_inst::eMol_aa) {
            data.SetIupacaa().Set(seq_str);
        } else {
            data.SetIupacna().Set(seq_str);
            CSeqportUtil::Pack(&data);
        }

    }
    
    
    return m_Entry;
}
CRef<CBioseq_set> CMakeCdrProds::MakeCdrProds(CRef<CSeq_annot> annot,
                                              CBioseq_Handle handle)
{
    CRef<CBioseq_set> bioseq_set(new CBioseq_set);
    if (!annot->GetData().IsFtable()) {
        // Is this the right thing to do?
        // Could throw, or could return null CRef instead.
        return bioseq_set;
    }

    list<CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable();

    NON_CONST_ITERATE (list<CRef<CSeq_feat> >, feat, ftable) {
        if (!(*feat)->GetData().IsCdregion()) {
            // not interested if not a Cdregion
            continue;
        }
        if ((*feat)->IsSetProduct()) {
            // already has a product; don't make new one
            continue;
        }

        string prot;
        CSeqTranslator::Translate(**feat, handle.GetScope(), prot);
        CRef<CSeq_data> seq_data(new CSeq_data(prot,
                                               CSeq_data::e_Iupacaa));
        CRef<CSeq_inst> seq_inst(new CSeq_inst);
        seq_inst->SetSeq_data(*seq_data);
        seq_inst->SetRepr(CSeq_inst_Base::eRepr_raw);
        seq_inst->SetMol(CSeq_inst_Base::eMol_aa);
        seq_inst->SetLength(prot.size());

        CRef<CBioseq> bio_seq(new CBioseq);
        string num = NStr::NumericToString(sm_Counter.Add(1));
        // pad to five digits
        if (num.size() < 5) {
            num.insert(SIZE_TYPE(0), 5 - num.size(), '0');
        }
        string acc = "tp" + num;
        string full_acc = "lcl|" + acc;
        CRef<CSeq_id> id(new CSeq_id(full_acc));
        bio_seq->SetId().push_back(id);
        // a title
        CRef<CSeqdesc> title(new CSeqdesc);
        title->SetTitle(string("Translation product ") + acc);
        bio_seq->SetDescr().Set().push_back(title);
        // Mol_type
        CRef<CSeqdesc> mol_type(new CSeqdesc);
        mol_type->SetMol_type( eGIBB_mol_peptide);
        bio_seq->SetDescr().Set().push_back(mol_type);
        
        // set the instance
        bio_seq->SetInst(*seq_inst);
        
        // wrap this Bio_seq in an entry
        CRef<CSeq_entry> seq_entry(new CSeq_entry);
        seq_entry->SetSeq(*bio_seq);
        
        // add this entry to our Bioseq_set
        bioseq_set->SetSeq_set().push_back(seq_entry);

        // record it as product in the annot we're handed
        CRef<CSeq_loc> prod_loc(new CSeq_loc);
        prod_loc->SetWhole(*id);
        (*feat)->SetProduct(*prod_loc);
    }

    return bioseq_set;
}