// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeRegulatory( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { static const map<string, string, CompareNoCase> mapTypeToQual = { {"DNAsel_hypersensitive_site", "DNase_I_hypersensitive_site"}, {"GC_rich_promoter_region", "GC_signal"}, {"boundary_element", "insulator"}, {"regulatory_region", "other"}, {"ribosome_entry_site", "ribosome_binding_site"}, }; feature.SetData().SetImp().SetKey("regulatory"); CRef<CGb_qual> regulatory_class(new CGb_qual); regulatory_class->SetQual("regulatory_class"); auto cit = mapTypeToQual.find(so_type); if (cit == mapTypeToQual.end()) { regulatory_class->SetVal(so_type); } else { regulatory_class->SetVal(cit->second); } feature.SetQual().push_back(regulatory_class); return true; }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignPhase( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { m_strPhase = "."; if ( ! feature.CanGetData() ) { return true; } const CSeq_feat::TData& data = feature.GetData(); if ( data.GetSubtype() != CSeq_feat::TData::eSubtype_cdregion ) { return true; } const CCdregion& cdr = data.GetCdregion(); CCdregion::TFrame frame = cdr.GetFrame(); switch ( frame ) { default: break; case CCdregion::eFrame_one: m_strPhase = "0"; break; case CCdregion::eFrame_two: m_strPhase = "1"; break; case CCdregion::eFrame_three: m_strPhase = "2"; break; } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeRegion( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { feature.SetData().SetRegion(); CRef<CGb_qual> qual(new CGb_qual("SO_type", so_type)); feature.SetQual().push_back(qual); return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeGene( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { feature.SetData().SetGene(); if (so_type == "pseudogene") { feature.SetPseudo(true); } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeMiscRna( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { feature.SetData().SetImp().SetKey("misc_RNA"); if (so_type=="pseudogenic_transcript") { feature.SetPseudo(true); } return true; }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignStop( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { if ( feature.CanGetLocation() ) { const CSeq_loc& location = feature.GetLocation(); unsigned int uEnd = location.GetStop( eExtreme_Positional ) + 1; m_strEnd = NStr::UIntToString( uEnd ); } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeCds( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { feature.SetData().SetCdregion(); if (so_type=="pseudogenic_CDS") { feature.SetPseudo(true); } return true; }
CAnnotCompare::TCompareFlags CAnnotCompare::CompareFeats(const CSeq_feat& feat1, CScope& scope1, const CSeq_feat& feat2, CScope& scope2, vector<ECompareFlags>* complex_flags, list<string>* comments) { return CompareFeats(feat1, feat1.GetLocation(), scope1, feat2, feat2.GetLocation(), scope2, complex_flags, comments); }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignType( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { m_strType = "region"; if ( feature.CanGetQual() ) { const vector< CRef< CGb_qual > >& quals = feature.GetQual(); vector< CRef< CGb_qual > >::const_iterator it = quals.begin(); while ( it != quals.end() ) { if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) { if ( (*it)->GetQual() == "standard_name" ) { m_strType = (*it)->GetVal(); return true; } } ++it; } } if ( ! feature.CanGetData() ) { return true; } switch ( feature.GetData().GetSubtype() ) { default: m_strType = feature.GetData().GetKey(); break; case CSeq_feat::TData::eSubtype_gene: m_strType = "gene"; break; case CSeq_feat::TData::eSubtype_cdregion: m_strType = "CDS"; break; case CSeq_feat::TData::eSubtype_mRNA: m_strType = "mRNA"; break; case CSeq_feat::TData::eSubtype_scRNA: m_strType = "scRNA"; break; case CSeq_feat::TData::eSubtype_exon: m_strType = "exon"; break; } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xMapRegulatory( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { map<string, string> mapRegulatoryClassToSoType = { {"DNase_I_hypersensitive_site", "DNaseI_hypersensitive_site"}, {"GC_signal", "GC_rich_promoter_region"}, {"enhancer_blocking_element", "regulatory_region"}, {"imprinting_control_region", "regulatory_region"}, {"matrix_attachment_region", "matrix_attachment_site"}, {"other", "regulatory_region"}, {"response_element", "regulatory_region"}, {"ribosome_binding_site", "ribosome_entry_site"}, }; string regulatory_class = feature.GetNamedQual("regulatory_class"); if (regulatory_class.empty()) { return false; } auto cit = mapRegulatoryClassToSoType.find(regulatory_class); if (cit == mapRegulatoryClassToSoType.end()) { so_type = regulatory_class; return true; } so_type = cit->second; return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xMapMiscRecomb( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { map<string, string> mapRecombClassToSoType = { {"meiotic", "meiotic_recombination_region"}, {"mitotic", "mitotic_recombination_region"}, {"non_allelic_homologous", "non_allelic_homologous_recombination_region"}, {"meiotic_recombination", "meiotic_recombination_region"}, {"mitotic_recombination", "mitotic_recombination_region"}, {"non_allelic_homologous_recombination", "non_allelic_homologous_recombination_region"}, {"other", "recombination_region"}, }; string recomb_class = feature.GetNamedQual("recombination_class"); if (recomb_class.empty()) { return false; } auto cit = mapRecombClassToSoType.find(recomb_class); if (cit == mapRecombClassToSoType.end()) { so_type = recomb_class; return true; } so_type = cit->second; return true; }
// ---------------------------------------------------------------------------- CConstRef<CSeq_feat> CFeatTableEdit::xGetMrnaParent( const CSeq_feat& feat) // ---------------------------------------------------------------------------- { CConstRef<CSeq_feat> pMrna; CSeq_feat_Handle sfh = mpScope->GetSeq_featHandle(feat); CSeq_annot_Handle sah = sfh.GetAnnot(); if (!sah) { return pMrna; } size_t bestLength(0); CFeat_CI findGene(sah, CSeqFeatData::eSubtype_mRNA); for ( ; findGene; ++findGene) { Int8 compare = sequence::TestForOverlap64( findGene->GetLocation(), feat.GetLocation(), sequence::eOverlap_Contained); if (compare == -1) { continue; } size_t currentLength = sequence::GetLength(findGene->GetLocation(), mpScope); if (!bestLength || currentLength > bestLength) { pMrna.Reset(&(findGene->GetOriginalFeature())); bestLength = currentLength; } } return pMrna; }
// ---------------------------------------------------------------------------- bool CSoMap::xMapRepeatRegion( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { map<string, string> mapSatelliteToSoType = { {"satellite", "satellite_DNA"}, {"microsatellite", "microsatellite"}, {"minisatellite", "minisatellite"}, }; string satellite = feature.GetNamedQual("satellite"); if (!satellite.empty()) { auto cit = mapSatelliteToSoType.find(satellite); if (cit == mapSatelliteToSoType.end()) { return false; } so_type = cit->second; return true; } map<string, string> mapRptTypeToSoType = { {"tandem", "tandem_repeat"}, {"inverted", "inverted_repeat"}, {"flanking", "repeat_region"}, {"terminal", "repeat_region"}, {"direct", "direct_repeat"}, {"dispersed", "dispersed_repeat"}, {"nested", "nested_repeat"}, {"non_ltr_retrotransposon_polymeric_tract", "non_LTR_retrotransposon_polymeric_tract"}, {"x_element_combinatorical_repeat", "X_element_combinatorical_repeat"}, {"y_prime_element", "Y_prime_element"}, {"other", "repeat_region"}, }; string rpt_type = feature.GetNamedQual("rpt_type"); if (rpt_type.empty()) { so_type = "repeat_region"; } auto cit = mapRptTypeToSoType.find(rpt_type); if (cit == mapRptTypeToSoType.end()) { so_type = rpt_type; return true; } so_type = cit->second; return true; }
// ---------------------------------------------------------------------------- bool CBedGraphWriter::xWriteSingleFeature( const CBedTrackRecord& trackdata, const CSeq_feat& feature) // ---------------------------------------------------------------------------- { CBedGraphRecord bedRecord; const CSeq_loc& location = feature.GetLocation(); if (!location.IsInt()) { NCBI_THROW( CObjWriterException, eInterrupted, "BedGraph writer does not support feature locations that are not intervals."); } const CSeq_interval& interval = location.GetInt(); const string& scoreStr = feature.GetNamedQual("score"); if (scoreStr.empty()) { NCBI_THROW( CObjWriterException, eInterrupted, "BedGraph writer only supports features with a \"score\" qualifier."); } double score = 0; try { score = NStr::StringToDouble(scoreStr); } catch(CException&) { NCBI_THROW( CObjWriterException, eInterrupted, "BedGraph writer encountered feature with bad \"score\" qualifier."); } const CSeq_id& id = interval.GetId(); string recordId; id.GetLabel(&recordId); bedRecord.SetChromId(recordId); bedRecord.SetChromStart(interval.GetFrom()); bedRecord.SetChromEnd(interval.GetTo()-1); bedRecord.SetChromValue(score); bedRecord.Write(m_Os); return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xMapNcRna( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { map<string, string> mapNcRnaClassToSoType = { {"lncRNA", "lnc_RNA"}, {"other", "ncRNA"}, }; string ncrna_class = feature.GetNamedQual("ncRNA_class"); if (ncrna_class.empty()) { if (feature.IsSetData() && feature.GetData().IsRna() && feature.GetData().GetRna().IsSetExt() && feature.GetData().GetRna().GetExt().IsGen() && feature.GetData().GetRna().GetExt().GetGen().IsSetClass()) { ncrna_class = feature.GetData().GetRna().GetExt().GetGen().GetClass(); if (ncrna_class == "classRNA") { ncrna_class = "ncRNA"; } } } if (ncrna_class.empty()) { return false; } auto cit = mapNcRnaClassToSoType.find(ncrna_class); if (cit == mapNcRnaClassToSoType.end()) { so_type = ncrna_class; return true; } so_type = cit->second; return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xMapGene( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { if (feature.IsSetPseudo() && feature.GetPseudo()) { so_type = "pseudogene"; return true; } for (auto qual: feature.GetQual()) { if (qual->GetQual() == "pseudo" || qual->GetQual() == "pseudogene") { so_type = "pseudogene"; return true; } } so_type = "gene"; return true; }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignSeqId( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { m_strSeqId = "<unknown>"; if ( feature.CanGetLocation() ) { const CSeq_loc& location = feature.GetLocation(); const CSeq_id* pId = location.GetId(); switch ( pId->Which() ) { case CSeq_id::e_Local: if ( pId->GetLocal().IsId() ) { m_strSeqId = NStr::UIntToString( pId->GetLocal().GetId() ); } else { m_strSeqId = pId->GetLocal().GetStr(); } break; case CSeq_id::e_Gi: m_strSeqId = NStr::IntToString( pId->GetGi() ); break; case CSeq_id::e_Other: if ( pId->GetOther().CanGetAccession() ) { m_strSeqId = pId->GetOther().GetAccession(); if ( pId->GetOther().CanGetVersion() ) { m_strSeqId += "."; m_strSeqId += NStr::UIntToString( pId->GetOther().GetVersion() ); } } break; default: break; } } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeRna( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { static const map<string, CRNA_ref::EType, CompareNoCase> mTypeToRna = { {"mRNA", CRNA_ref::eType_mRNA}, {"rRNA", CRNA_ref::eType_rRNA}, {"pseudogenic_rRNA", CRNA_ref::eType_rRNA}, {"tRNA", CRNA_ref::eType_tRNA}, {"pseudogenic_tRNA", CRNA_ref::eType_tRNA}, {"tmRNA", CRNA_ref::eType_tmRNA}, }; auto it = mTypeToRna.find(so_type); feature.SetData().SetRna().SetType(it->second); if(NStr::StartsWith(so_type, "pseudogenic_")) { feature.SetPseudo(true); } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeRepeatRegion( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { static const map<string, string, CompareNoCase> mapTypeToSatellite = { {"microsatellite", "microsatellite"}, {"minisatellite", "minisatellite"}, {"satellite_DNA", "satellite"}, }; static const map<string, string, CompareNoCase> mapTypeToRptType = { {"tandem_repeat", "tandem"}, {"inverted_repeat", "inverted"}, {"direct_repeat", "direct"}, {"nested_repeat", "nested"}, {"non_LTR_retrotransposon_polymeric_tract", "non_ltr_retrotransposon_polymeric_tract"}, {"X_element_combinatorial_repeat", "x_element_combinatorial_repeat"}, {"Y_prime_element", "y_prime_element"}, {"repeat_region", "other"}, }; feature.SetData().SetImp().SetKey("repeat_region"); CRef<CGb_qual> qual(new CGb_qual); auto cit = mapTypeToSatellite.find(so_type); if (cit != mapTypeToSatellite.end()) { qual->SetQual("satellite"); qual->SetVal(cit->second); } else { qual->SetQual("rpt_type"); cit = mapTypeToRptType.find(so_type); if (cit == mapTypeToRptType.end()) { qual->SetVal(so_type); } else { qual->SetVal(cit->second); } } feature.SetQual().push_back(qual); return true; }
TSeqPos CFeatTrim::x_GetStartOffset(const CSeq_feat& feat, TSeqPos from, TSeqPos to) { TSeqPos offset = 0; const auto strand = feat.GetLocation().GetStrand(); CRange<TSeqPos> feat_range = feat.GetLocation().GetTotalRange(); if (strand != eNa_strand_minus) { TSeqPos feat_from = feat_range.GetFrom(); if (feat_from < from) { offset = from - feat_from; } } else { // eNa_strand_minus TSeqPos feat_to = feat_range.GetTo(); if (feat_to > to) { offset = feat_to - to; } } return offset; }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignScore( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { m_strScore = "."; if ( feature.CanGetQual() ) { const vector< CRef< CGb_qual > >& quals = feature.GetQual(); vector< CRef< CGb_qual > >::const_iterator it = quals.begin(); while ( it != quals.end() ) { if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) { if ( (*it)->GetQual() == "score" ) { m_strScore = (*it)->GetVal(); return true; } } ++it; } } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::FeatureToSoType( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { auto subtype = feature.GetData().GetSubtype(); TYPEFUNCENTRY cit = mMapTypeFunc.find(subtype); if (cit == mMapTypeFunc.end()) { return false; } return (cit->second)(feature, so_type); }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignStrand( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { m_strStrand = "."; if ( feature.CanGetLocation() ) { const CSeq_loc& location = feature.GetLocation(); ENa_strand strand = location.GetStrand(); switch( strand ) { default: break; case eNa_strand_plus: m_strStrand = "+"; break; case eNa_strand_minus: m_strStrand = "-"; break; } } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeNcRna( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { static const map<string, string, CompareNoCase> mTypeToClass = { {"ncRNA", "other"}, }; feature.SetData().SetRna().SetType(CRNA_ref::eType_ncRNA); CRef<CGb_qual> qual(new CGb_qual); qual->SetQual("ncRNA_class"); auto it = mTypeToClass.find(so_type); if (it == mTypeToClass.end()) { qual->SetVal(so_type); } else { qual->SetVal(it->second); } feature.SetQual().push_back(qual); return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeImp( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { static const map<string, string, CompareNoCase> mapTypeToKey = { {"C_gene_segment", "C_region"}, {"D_gene_segment", "D_segment"}, {"D_loop", "D-loop"}, {"J_gene_segment", "J_segment"}, {"V_gene_segment", "V_segment"}, {"binding_site", "misc_binding"}, {"five_prime_UTR", "5\'UTR"}, {"long_terminal_repeat", "LTR"}, {"mature_protein_region", "mat_peptide"}, {"mobile_genetic_element", "mobile_element"}, {"modified_DNA_base", "modified_base"}, {"origin_of_replication", "rep_origin"}, {"primary_transcript", "prim_transcript"}, {"primer_binding_site", "primer_bind"}, {"protein_binding_site", "protein_bind"}, {"region", "source"}, {"sequence_alteration", "variation"}, {"sequence_difference", "misc_difference"}, {"sequence_secondary_structure", "misc_structure"}, {"sequence_uncertainty", "unsure"}, {"signal_peptide", "sig_peptide"}, {"three_prime_UTR", "3\'UTR"}, }; auto cit = mapTypeToKey.find(so_type); if (cit == mapTypeToKey.end()) { feature.SetData().SetImp().SetKey(so_type); } else { feature.SetData().SetImp().SetKey(cit->second); } return true; }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignAttributesExtended( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { if ( feature.CanGetDbxref() ) { const CSeq_feat::TDbxref& dbxrefs = feature.GetDbxref(); if ( dbxrefs.size() > 0 ) { string value; dbxrefs[0]->GetLabel( &value ); for ( size_t i=1; i < dbxrefs.size(); ++i ) { string label; dbxrefs[i]->GetLabel( &label ); value += ","; value += label; } AddAttribute( "Dbxref", value ); } } if ( feature.CanGetComment() ) { AddAttribute( "comment", feature.GetComment() ); } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeMiscRecomb( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { static const map<string, string, CompareNoCase> mapTypeToQual = { {"meiotic_recombination_region", "meiotic"}, {"mitotic_recombination_region", "mitotic"}, {"non_allelic_homologous_recombination", "non_allelic_homologous"}, {"recombination_feature", "other"}, }; feature.SetData().SetImp().SetKey("misc_recomb"); CRef<CGb_qual> recombination_class(new CGb_qual); recombination_class->SetQual("recombination_class"); auto cit = mapTypeToQual.find(so_type); if (cit == mapTypeToQual.end()) { recombination_class->SetVal(so_type); } else { recombination_class->SetVal(cit->second); } feature.SetQual().push_back(recombination_class); return true; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeMiscFeature( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { static const map<string, string, CompareNoCase> mapTypeToQual = { {"TSS", "transcription_start_site"}, }; feature.SetData().SetImp().SetKey("misc_feature"); if (so_type == "sequence_feature") { return true; } CRef<CGb_qual> feat_class(new CGb_qual); feat_class->SetQual("feat_class"); auto cit = mapTypeToQual.find(so_type); if (cit == mapTypeToQual.end()) { feat_class->SetVal(so_type); } else { feat_class->SetVal(cit->second); } feature.SetQual().push_back(feat_class); return true; }
// ---------------------------------------------------------------------------- CRef<CSeq_feat> CFeatTableEdit::xMakeGeneForMrna( const CSeq_feat& rna) // ---------------------------------------------------------------------------- { CRef<CSeq_feat> pGene; CSeq_feat_Handle sfh = mpScope->GetSeq_featHandle(rna); CSeq_annot_Handle sah = sfh.GetAnnot(); if (!sah) { return pGene; } CConstRef<CSeq_feat> pExistingGene = xGetGeneParent(rna); if (pExistingGene) { return pGene; } pGene.Reset(new CSeq_feat); pGene->SetLocation().SetInt(); pGene->SetLocation().SetId(*rna.GetLocation().GetId()); pGene->SetLocation().SetInt().SetFrom(rna.GetLocation().GetStart( eExtreme_Positional)); pGene->SetLocation().SetInt().SetTo(rna.GetLocation().GetStop( eExtreme_Positional)); pGene->SetData().SetGene(); return pGene; }
// ---------------------------------------------------------------------------- bool CSoMap::xFeatureMakeProt( const string& so_type, CSeq_feat& feature) // ---------------------------------------------------------------------------- { static const map<string, CProt_ref::EProcessed, CompareNoCase> mTypeToProcessed = { {"mature_protein_region", CProt_ref::eProcessed_mature}, {"propeptide", CProt_ref::eProcessed_propeptide}, }; auto cit = mTypeToProcessed.find(so_type); if (cit == mTypeToProcessed.end()) { return false; } feature.SetData().SetProt().SetProcessed(cit->second); return true; }