예제 #1
0
//  ----------------------------------------------------------------------------
bool CSoMap::xMapNcRna(
    const CSeq_feat& feature,
    string& so_type)
//  ----------------------------------------------------------------------------
{
    map<string, string> mapNcRnaClassToSoType = {
        {"lncRNA", "lnc_RNA"},
        {"other", "ncRNA"},
    };
    string ncrna_class = feature.GetNamedQual("ncRNA_class");
    if (ncrna_class.empty()) {
        if (feature.IsSetData()  &&
                feature.GetData().IsRna()  &&
                feature.GetData().GetRna().IsSetExt()  &&
                feature.GetData().GetRna().GetExt().IsGen()  &&
                feature.GetData().GetRna().GetExt().GetGen().IsSetClass()) {
            ncrna_class = feature.GetData().GetRna().GetExt().GetGen().GetClass();
            if (ncrna_class == "classRNA") {
                ncrna_class = "ncRNA";
            }
        }
    }
    if (ncrna_class.empty()) {
        return false;
    }
    auto cit = mapNcRnaClassToSoType.find(ncrna_class);
    if (cit == mapNcRnaClassToSoType.end()) {
        so_type = ncrna_class;
        return true;
    }
    so_type = cit->second;
    return true;
}
예제 #2
0
//  ----------------------------------------------------------------------------
bool CGffRecord::AssignType(
    const CSeq_feat& feature )
//  ----------------------------------------------------------------------------
{
    m_strType = "region";

    if ( feature.CanGetQual() ) {
        const vector< CRef< CGb_qual > >& quals = feature.GetQual();
        vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
        while ( it != quals.end() ) {
            if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) {
                if ( (*it)->GetQual() == "standard_name" ) {
                    m_strType = (*it)->GetVal();
                    return true;
                }
            }
            ++it;
        }
    }

    if ( ! feature.CanGetData() ) {
        return true;
    }

    switch ( feature.GetData().GetSubtype() ) {
    default:
        m_strType = feature.GetData().GetKey();
        break;

    case CSeq_feat::TData::eSubtype_gene:
        m_strType = "gene";
        break;

    case CSeq_feat::TData::eSubtype_cdregion:
        m_strType = "CDS";
        break;

    case CSeq_feat::TData::eSubtype_mRNA:
        m_strType = "mRNA";
        break;

    case CSeq_feat::TData::eSubtype_scRNA:
        m_strType = "scRNA";
        break;

    case CSeq_feat::TData::eSubtype_exon:
        m_strType = "exon";
        break;
    }
    return true;
}
예제 #3
0
//  ----------------------------------------------------------------------------
bool CGffRecord::AssignPhase(
    const CSeq_feat& feature )
//  ----------------------------------------------------------------------------
{
    m_strPhase = ".";

    if ( ! feature.CanGetData() ) {
        return true;
    }
    const CSeq_feat::TData& data = feature.GetData();
    if ( data.GetSubtype() != CSeq_feat::TData::eSubtype_cdregion ) {
        return true;
    }

    const CCdregion& cdr = data.GetCdregion();
    CCdregion::TFrame frame = cdr.GetFrame();
    switch ( frame ) {
    default:
        break;
    case CCdregion::eFrame_one:
        m_strPhase = "0";
        break;
    case CCdregion::eFrame_two:
        m_strPhase = "1";
        break;
    case CCdregion::eFrame_three:
        m_strPhase = "2";
        break;
    }

    return true;
}
예제 #4
0
//  ----------------------------------------------------------------------------
bool CSoMap::FeatureToSoType(
    const CSeq_feat& feature,
    string& so_type)
//  ----------------------------------------------------------------------------
{
    auto subtype = feature.GetData().GetSubtype();
    TYPEFUNCENTRY cit = mMapTypeFunc.find(subtype);
    if (cit == mMapTypeFunc.end()) {
        return false;
    }
    return (cit->second)(feature, so_type);
}
예제 #5
0
//  ----------------------------------------------------------------------------
bool CSoMap::xMapRna(
    const CSeq_feat& feature,
    string& so_type)
//  ----------------------------------------------------------------------------
{
    static const map<CSeqFeatData::ESubtype, string> mapSubtypeStraight = {
        {CSeqFeatData::eSubtype_misc_RNA, "transcript"},
        {CSeqFeatData::eSubtype_rRNA, "rRNA"},
        {CSeqFeatData::eSubtype_tRNA, "tRNA"},
    };
    static const map<CSeqFeatData::ESubtype, string> mapSubtypePseudo = {
        {CSeqFeatData::eSubtype_misc_RNA, "pseudogenic_transcript"},
        {CSeqFeatData::eSubtype_rRNA, "pseudogenic_rRNA"},
        {CSeqFeatData::eSubtype_tRNA, "pseudogenic_tRNA"},
    };

    auto subtype = feature.GetData().GetSubtype();
    if (feature.IsSetPseudo()  &&  feature.GetPseudo()) {
        auto cit = mapSubtypePseudo.find(subtype);
        if (cit == mapSubtypePseudo.end()) {
            return false;
        }
        so_type = cit->second;
        return true;
    }
    if (feature.IsSetPseudo()  &&  !feature.GetPseudo()) {
        auto cit = mapSubtypeStraight.find(subtype);
        if (cit == mapSubtypeStraight.end()) {
            return false;
        }
        so_type = cit->second;
        return true;
    }

    for (auto qual: feature.GetQual()) {
        if (qual->GetQual() == "pseudo"  ||  qual->GetQual() == "pseudogene") {
            auto cit = mapSubtypePseudo.find(subtype);
            if (cit == mapSubtypePseudo.end()) {
                return false;
            }
            so_type = cit->second;
            return true;
        }
    }
    auto cit = mapSubtypeStraight.find(subtype);
    if (cit == mapSubtypeStraight.end()) {
        return false;
    }
    so_type = cit->second;
    return true;
}
예제 #6
0
// Corresponds to SortFeatItemListByPos from the C toolkit
int CSeq_feat::CompareNonLocation(const CSeq_feat& f2,
                                  const CSeq_loc& loc1,
                                  const CSeq_loc& loc2) const
{
    const CSeqFeatData& data1 = GetData();
    const CSeqFeatData& data2 = f2.GetData();
    CSeqFeatData::E_Choice type1 = data1.Which();
    CSeqFeatData::E_Choice type2 = data2.Which();

    // operon first
    if ( int diff = s_IsOperon(data2) - s_IsOperon(data1) ) {
        return diff;
    }
    if ( type1 != type2 ) {
        // order by feature type
        int order1 = GetTypeSortingOrder(type1);
        int order2 = GetTypeSortingOrder(type2);
        int diff = order1 - order2;
        if ( diff != 0 )
            return diff;
    }

    // minus strand last
    ENa_strand strand1 = loc1.GetStrand();
    ENa_strand strand2 = loc2.GetStrand();
    if ( int diff = IsReverse(strand1) - IsReverse(strand2) ) {
        return diff;
    }

    if ( int diff = loc1.CompareSubLoc(loc2, strand1) ) {
        return diff;
    }

    {{ // compare subtypes
        CSeqFeatData::ESubtype subtype1 = data1.GetSubtype();
        CSeqFeatData::ESubtype subtype2 = data2.GetSubtype();
        int diff = subtype1 - subtype2;
        if ( diff != 0 )
            return diff;
    }}

    // subtypes are equal, types must be equal too
    _ASSERT(type1 == type2);

    // type dependent comparison
    if ( type1 == CSeqFeatData::e_Cdregion ) {
        // compare frames of identical CDS ranges
        if ( int diff = s_GetCdregionOrder(data1)-s_GetCdregionOrder(data2) ) {
            return diff;
        }
    }
    else if ( type1 == CSeqFeatData::e_Imp ) {
        // compare labels of imp features
        int diff = NStr::CompareNocase(data1.GetImp().GetKey(),
                                       data2.GetImp().GetKey());
        if ( diff != 0 )
            return diff;
    }

    // XXX - should compare parent seq-annots
    // XXX 1. parent Seq-annot idx.itemID
    // XXX 2. features itemID

    return 0; // unknown
}
예제 #7
0
//  ----------------------------------------------------------------------------
bool CSoMap::xMapGeneric(
    const CSeq_feat& feature,
    string& so_type)
//  ----------------------------------------------------------------------------
{
    static const map<CSeqFeatData::ESubtype, string> mapSubtypeToSoType = {
        {CSeqFeatData::eSubtype_3UTR, "three_prime_UTR"},
        {CSeqFeatData::eSubtype_5UTR, "five_prime_UTR"},
        {CSeqFeatData::eSubtype_assembly_gap, "assemply_gap"},
        {CSeqFeatData::eSubtype_C_region, "C_gene_segment"},
        {CSeqFeatData::eSubtype_centromere, "centromere"},
        {CSeqFeatData::eSubtype_D_loop, "D_loop"},
        {CSeqFeatData::eSubtype_D_segment, "D_gene_segment"},
        {CSeqFeatData::eSubtype_exon, "exon"},
        {CSeqFeatData::eSubtype_enhancer, "enhancer"},
        {CSeqFeatData::eSubtype_gap, "gap"},
        {CSeqFeatData::eSubtype_iDNA, "iDNA"},
        {CSeqFeatData::eSubtype_intron, "intron"},
        {CSeqFeatData::eSubtype_J_segment, "J_gene_segment"},
        {CSeqFeatData::eSubtype_LTR, "long_terminal_repeat"},
        {CSeqFeatData::eSubtype_mat_peptide, "mature_protein_region"},
        {CSeqFeatData::eSubtype_misc_binding, "binding_site"},
        {CSeqFeatData::eSubtype_misc_difference, "sequence_difference"},
        {CSeqFeatData::eSubtype_misc_structure, "sequence_secondary_structure"},
        {CSeqFeatData::eSubtype_mobile_element, "mobile_genetic_element"},
        {CSeqFeatData::eSubtype_modified_base, "modified_DNA_base"},
        {CSeqFeatData::eSubtype_mRNA, "mRNA"},
        {CSeqFeatData::eSubtype_N_region, "N_region"}, 
        {CSeqFeatData::eSubtype_operon, "operon"}, 
        {CSeqFeatData::eSubtype_oriT, "oriT"}, 
        {CSeqFeatData::eSubtype_otherRNA, "transcript"},
        {CSeqFeatData::eSubtype_polyA_site, "polyA_site"}, 
        {CSeqFeatData::eSubtype_precursor_RNA, "primary_transcript"}, 
        {CSeqFeatData::eSubtype_preRNA, "primary_transcript"},
        {CSeqFeatData::eSubtype_prim_transcript, "primary_transcript"}, 
        {CSeqFeatData::eSubtype_primer_bind, "primer_binding_site"}, 
        {CSeqFeatData::eSubtype_promoter, "promoter"}, 
        {CSeqFeatData::eSubtype_propeptide, "propeptide"}, 
        {CSeqFeatData::eSubtype_protein_bind, "protein_binding_site"},
        {CSeqFeatData::eSubtype_rep_origin, "origin_of_replication"},
        {CSeqFeatData::eSubtype_S_region, "S_region"},
        {CSeqFeatData::eSubtype_sig_peptide, "signal_peptide"},
        {CSeqFeatData::eSubtype_source, "region"},
        {CSeqFeatData::eSubtype_stem_loop, "stem_loop"},
        {CSeqFeatData::eSubtype_STS, "STS"},
        {CSeqFeatData::eSubtype_telomere, "telomere"},
        {CSeqFeatData::eSubtype_terminator, "terminator"},
        {CSeqFeatData::eSubtype_tmRNA, "tmRNA"},
        {CSeqFeatData::eSubtype_transit_peptide, "transit_peptide"},
        {CSeqFeatData::eSubtype_unsure, "sequence_uncertainty"},
        {CSeqFeatData::eSubtype_V_region, "V_region"},
        {CSeqFeatData::eSubtype_V_segment, "V_gene_segment"},
        {CSeqFeatData::eSubtype_variation, "sequence_alteration"},
        //{CSeqFeatData::eSubtype_attenuator, "attenuator"},
    };
    auto subtype = feature.GetData().GetSubtype();
    auto cit = mapSubtypeToSoType.find(subtype);
    if (cit != mapSubtypeToSoType.end()) {
        so_type = cit->second;
        return true;
    }
    return false;
}
예제 #8
0
//  ----------------------------------------------------------------------------
bool CGffRecord::AssignAttributesCore(
    const CSeq_annot& annot,
    const CSeq_feat& feature )
//  ----------------------------------------------------------------------------
{
    m_strAttributes = "";

    // If feature ids are present then they are likely used to show parent/child
    // relationships, via corresponding xrefs. Thus, any feature ids override
    // gb ID tags (feature ids and ID tags should agree in the first place, but
    // if not, feature ids must trump ID tags).
    //
    bool bIdAssigned = false;

    if ( feature.CanGetId() ) {
        const CSeq_feat::TId& id = feature.GetId();
        string value = CGffRecord::FeatIdString( id );
        AddAttribute( "ID", value );
        bIdAssigned = true;
    }

    if ( feature.CanGetXref() ) {
        const CSeq_feat::TXref& xref = feature.GetXref();
        string value;
        for ( size_t i=0; i < xref.size(); ++i ) {
//            const CSeqFeatXref& ref = *xref[i];
            if ( xref[i]->CanGetId() && xref[i]->CanGetData() ) {
                const CSeqFeatXref::TId& id = xref[i]->GetId();
                CSeq_feat::TData::ESubtype other_type = GetSubtypeOf( annot, id );
                if ( ! IsParentOf( other_type, feature.GetData().GetSubtype() ) ) {
                    continue;
                }
                if ( ! value.empty() ) {
                    value += ",";
                }
                value += CGffRecord::FeatIdString( id );
            }
        }
        if ( ! value.empty() ) {
            AddAttribute( "Parent", value );
        }
    }

    if ( feature.CanGetQual() ) {
        const vector< CRef< CGb_qual > >& quals = feature.GetQual();
        vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
        while ( it != quals.end() ) {
            if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) {
                if ( (*it)->GetQual() == "ID" ) {
                    if ( ! bIdAssigned ) {
                        AddAttribute( "ID", (*it)->GetVal() );
                    }
                }
                if ( (*it)->GetQual() == "Name" ) {
                    AddAttribute( "Name", (*it)->GetVal() );
                }
                if ( (*it)->GetQual() == "Var_type" ) {
                    AddAttribute( "Var_type", (*it)->GetVal() );
                }
            }
            ++it;
        }
    }

    return true;
}