// ---------------------------------------------------------------------------- bool CSoMap::xMapNcRna( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { map<string, string> mapNcRnaClassToSoType = { {"lncRNA", "lnc_RNA"}, {"other", "ncRNA"}, }; string ncrna_class = feature.GetNamedQual("ncRNA_class"); if (ncrna_class.empty()) { if (feature.IsSetData() && feature.GetData().IsRna() && feature.GetData().GetRna().IsSetExt() && feature.GetData().GetRna().GetExt().IsGen() && feature.GetData().GetRna().GetExt().GetGen().IsSetClass()) { ncrna_class = feature.GetData().GetRna().GetExt().GetGen().GetClass(); if (ncrna_class == "classRNA") { ncrna_class = "ncRNA"; } } } if (ncrna_class.empty()) { return false; } auto cit = mapNcRnaClassToSoType.find(ncrna_class); if (cit == mapNcRnaClassToSoType.end()) { so_type = ncrna_class; return true; } so_type = cit->second; return true; }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignType( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { m_strType = "region"; if ( feature.CanGetQual() ) { const vector< CRef< CGb_qual > >& quals = feature.GetQual(); vector< CRef< CGb_qual > >::const_iterator it = quals.begin(); while ( it != quals.end() ) { if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) { if ( (*it)->GetQual() == "standard_name" ) { m_strType = (*it)->GetVal(); return true; } } ++it; } } if ( ! feature.CanGetData() ) { return true; } switch ( feature.GetData().GetSubtype() ) { default: m_strType = feature.GetData().GetKey(); break; case CSeq_feat::TData::eSubtype_gene: m_strType = "gene"; break; case CSeq_feat::TData::eSubtype_cdregion: m_strType = "CDS"; break; case CSeq_feat::TData::eSubtype_mRNA: m_strType = "mRNA"; break; case CSeq_feat::TData::eSubtype_scRNA: m_strType = "scRNA"; break; case CSeq_feat::TData::eSubtype_exon: m_strType = "exon"; break; } return true; }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignPhase( const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { m_strPhase = "."; if ( ! feature.CanGetData() ) { return true; } const CSeq_feat::TData& data = feature.GetData(); if ( data.GetSubtype() != CSeq_feat::TData::eSubtype_cdregion ) { return true; } const CCdregion& cdr = data.GetCdregion(); CCdregion::TFrame frame = cdr.GetFrame(); switch ( frame ) { default: break; case CCdregion::eFrame_one: m_strPhase = "0"; break; case CCdregion::eFrame_two: m_strPhase = "1"; break; case CCdregion::eFrame_three: m_strPhase = "2"; break; } return true; }
// ---------------------------------------------------------------------------- bool CSoMap::FeatureToSoType( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { auto subtype = feature.GetData().GetSubtype(); TYPEFUNCENTRY cit = mMapTypeFunc.find(subtype); if (cit == mMapTypeFunc.end()) { return false; } return (cit->second)(feature, so_type); }
// ---------------------------------------------------------------------------- bool CSoMap::xMapRna( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { static const map<CSeqFeatData::ESubtype, string> mapSubtypeStraight = { {CSeqFeatData::eSubtype_misc_RNA, "transcript"}, {CSeqFeatData::eSubtype_rRNA, "rRNA"}, {CSeqFeatData::eSubtype_tRNA, "tRNA"}, }; static const map<CSeqFeatData::ESubtype, string> mapSubtypePseudo = { {CSeqFeatData::eSubtype_misc_RNA, "pseudogenic_transcript"}, {CSeqFeatData::eSubtype_rRNA, "pseudogenic_rRNA"}, {CSeqFeatData::eSubtype_tRNA, "pseudogenic_tRNA"}, }; auto subtype = feature.GetData().GetSubtype(); if (feature.IsSetPseudo() && feature.GetPseudo()) { auto cit = mapSubtypePseudo.find(subtype); if (cit == mapSubtypePseudo.end()) { return false; } so_type = cit->second; return true; } if (feature.IsSetPseudo() && !feature.GetPseudo()) { auto cit = mapSubtypeStraight.find(subtype); if (cit == mapSubtypeStraight.end()) { return false; } so_type = cit->second; return true; } for (auto qual: feature.GetQual()) { if (qual->GetQual() == "pseudo" || qual->GetQual() == "pseudogene") { auto cit = mapSubtypePseudo.find(subtype); if (cit == mapSubtypePseudo.end()) { return false; } so_type = cit->second; return true; } } auto cit = mapSubtypeStraight.find(subtype); if (cit == mapSubtypeStraight.end()) { return false; } so_type = cit->second; return true; }
// Corresponds to SortFeatItemListByPos from the C toolkit int CSeq_feat::CompareNonLocation(const CSeq_feat& f2, const CSeq_loc& loc1, const CSeq_loc& loc2) const { const CSeqFeatData& data1 = GetData(); const CSeqFeatData& data2 = f2.GetData(); CSeqFeatData::E_Choice type1 = data1.Which(); CSeqFeatData::E_Choice type2 = data2.Which(); // operon first if ( int diff = s_IsOperon(data2) - s_IsOperon(data1) ) { return diff; } if ( type1 != type2 ) { // order by feature type int order1 = GetTypeSortingOrder(type1); int order2 = GetTypeSortingOrder(type2); int diff = order1 - order2; if ( diff != 0 ) return diff; } // minus strand last ENa_strand strand1 = loc1.GetStrand(); ENa_strand strand2 = loc2.GetStrand(); if ( int diff = IsReverse(strand1) - IsReverse(strand2) ) { return diff; } if ( int diff = loc1.CompareSubLoc(loc2, strand1) ) { return diff; } {{ // compare subtypes CSeqFeatData::ESubtype subtype1 = data1.GetSubtype(); CSeqFeatData::ESubtype subtype2 = data2.GetSubtype(); int diff = subtype1 - subtype2; if ( diff != 0 ) return diff; }} // subtypes are equal, types must be equal too _ASSERT(type1 == type2); // type dependent comparison if ( type1 == CSeqFeatData::e_Cdregion ) { // compare frames of identical CDS ranges if ( int diff = s_GetCdregionOrder(data1)-s_GetCdregionOrder(data2) ) { return diff; } } else if ( type1 == CSeqFeatData::e_Imp ) { // compare labels of imp features int diff = NStr::CompareNocase(data1.GetImp().GetKey(), data2.GetImp().GetKey()); if ( diff != 0 ) return diff; } // XXX - should compare parent seq-annots // XXX 1. parent Seq-annot idx.itemID // XXX 2. features itemID return 0; // unknown }
// ---------------------------------------------------------------------------- bool CSoMap::xMapGeneric( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { static const map<CSeqFeatData::ESubtype, string> mapSubtypeToSoType = { {CSeqFeatData::eSubtype_3UTR, "three_prime_UTR"}, {CSeqFeatData::eSubtype_5UTR, "five_prime_UTR"}, {CSeqFeatData::eSubtype_assembly_gap, "assemply_gap"}, {CSeqFeatData::eSubtype_C_region, "C_gene_segment"}, {CSeqFeatData::eSubtype_centromere, "centromere"}, {CSeqFeatData::eSubtype_D_loop, "D_loop"}, {CSeqFeatData::eSubtype_D_segment, "D_gene_segment"}, {CSeqFeatData::eSubtype_exon, "exon"}, {CSeqFeatData::eSubtype_enhancer, "enhancer"}, {CSeqFeatData::eSubtype_gap, "gap"}, {CSeqFeatData::eSubtype_iDNA, "iDNA"}, {CSeqFeatData::eSubtype_intron, "intron"}, {CSeqFeatData::eSubtype_J_segment, "J_gene_segment"}, {CSeqFeatData::eSubtype_LTR, "long_terminal_repeat"}, {CSeqFeatData::eSubtype_mat_peptide, "mature_protein_region"}, {CSeqFeatData::eSubtype_misc_binding, "binding_site"}, {CSeqFeatData::eSubtype_misc_difference, "sequence_difference"}, {CSeqFeatData::eSubtype_misc_structure, "sequence_secondary_structure"}, {CSeqFeatData::eSubtype_mobile_element, "mobile_genetic_element"}, {CSeqFeatData::eSubtype_modified_base, "modified_DNA_base"}, {CSeqFeatData::eSubtype_mRNA, "mRNA"}, {CSeqFeatData::eSubtype_N_region, "N_region"}, {CSeqFeatData::eSubtype_operon, "operon"}, {CSeqFeatData::eSubtype_oriT, "oriT"}, {CSeqFeatData::eSubtype_otherRNA, "transcript"}, {CSeqFeatData::eSubtype_polyA_site, "polyA_site"}, {CSeqFeatData::eSubtype_precursor_RNA, "primary_transcript"}, {CSeqFeatData::eSubtype_preRNA, "primary_transcript"}, {CSeqFeatData::eSubtype_prim_transcript, "primary_transcript"}, {CSeqFeatData::eSubtype_primer_bind, "primer_binding_site"}, {CSeqFeatData::eSubtype_promoter, "promoter"}, {CSeqFeatData::eSubtype_propeptide, "propeptide"}, {CSeqFeatData::eSubtype_protein_bind, "protein_binding_site"}, {CSeqFeatData::eSubtype_rep_origin, "origin_of_replication"}, {CSeqFeatData::eSubtype_S_region, "S_region"}, {CSeqFeatData::eSubtype_sig_peptide, "signal_peptide"}, {CSeqFeatData::eSubtype_source, "region"}, {CSeqFeatData::eSubtype_stem_loop, "stem_loop"}, {CSeqFeatData::eSubtype_STS, "STS"}, {CSeqFeatData::eSubtype_telomere, "telomere"}, {CSeqFeatData::eSubtype_terminator, "terminator"}, {CSeqFeatData::eSubtype_tmRNA, "tmRNA"}, {CSeqFeatData::eSubtype_transit_peptide, "transit_peptide"}, {CSeqFeatData::eSubtype_unsure, "sequence_uncertainty"}, {CSeqFeatData::eSubtype_V_region, "V_region"}, {CSeqFeatData::eSubtype_V_segment, "V_gene_segment"}, {CSeqFeatData::eSubtype_variation, "sequence_alteration"}, //{CSeqFeatData::eSubtype_attenuator, "attenuator"}, }; auto subtype = feature.GetData().GetSubtype(); auto cit = mapSubtypeToSoType.find(subtype); if (cit != mapSubtypeToSoType.end()) { so_type = cit->second; return true; } return false; }
// ---------------------------------------------------------------------------- bool CGffRecord::AssignAttributesCore( const CSeq_annot& annot, const CSeq_feat& feature ) // ---------------------------------------------------------------------------- { m_strAttributes = ""; // If feature ids are present then they are likely used to show parent/child // relationships, via corresponding xrefs. Thus, any feature ids override // gb ID tags (feature ids and ID tags should agree in the first place, but // if not, feature ids must trump ID tags). // bool bIdAssigned = false; if ( feature.CanGetId() ) { const CSeq_feat::TId& id = feature.GetId(); string value = CGffRecord::FeatIdString( id ); AddAttribute( "ID", value ); bIdAssigned = true; } if ( feature.CanGetXref() ) { const CSeq_feat::TXref& xref = feature.GetXref(); string value; for ( size_t i=0; i < xref.size(); ++i ) { // const CSeqFeatXref& ref = *xref[i]; if ( xref[i]->CanGetId() && xref[i]->CanGetData() ) { const CSeqFeatXref::TId& id = xref[i]->GetId(); CSeq_feat::TData::ESubtype other_type = GetSubtypeOf( annot, id ); if ( ! IsParentOf( other_type, feature.GetData().GetSubtype() ) ) { continue; } if ( ! value.empty() ) { value += ","; } value += CGffRecord::FeatIdString( id ); } } if ( ! value.empty() ) { AddAttribute( "Parent", value ); } } if ( feature.CanGetQual() ) { const vector< CRef< CGb_qual > >& quals = feature.GetQual(); vector< CRef< CGb_qual > >::const_iterator it = quals.begin(); while ( it != quals.end() ) { if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) { if ( (*it)->GetQual() == "ID" ) { if ( ! bIdAssigned ) { AddAttribute( "ID", (*it)->GetVal() ); } } if ( (*it)->GetQual() == "Name" ) { AddAttribute( "Name", (*it)->GetVal() ); } if ( (*it)->GetQual() == "Var_type" ) { AddAttribute( "Var_type", (*it)->GetVal() ); } } ++it; } } return true; }