Example #1
0
//  ----------------------------------------------------------------------------
bool CGff3Reader::x_UpdateAnnot(
    const CGff2Record& record,
    CRef< CSeq_annot > pAnnot )
//  ----------------------------------------------------------------------------
{
    string gbkey;
    record.GetAttribute("gbkey", gbkey);
    CRef< CSeq_feat > pFeature(new CSeq_feat);

    //  Round trip info:
    CRef< CGb_qual > pQual( new CGb_qual );
    pQual->SetQual( "gff_source" );
    pQual->SetVal( record.Source() );
    pFeature->SetQual().push_back( pQual );

    pQual.Reset( new CGb_qual );
    pQual->SetQual( "gff_type" );
    pQual->SetVal( record.Type() );
    pFeature->SetQual().push_back( pQual );

    if ( record.IsSetScore() ) {
        pQual.Reset( new CGb_qual );
        pQual->SetQual( "gff_score" );
        pQual->SetVal( NStr::DoubleToString( record.Score() ) );
        pFeature->SetQual().push_back( pQual );
    }

    //  Special case: exon feature belonging to an RNA we have already seen
    if (record.Type() == "exon") {
        string parent;
        if (record.GetAttribute("Parent", parent)) {
            IdToFeatureMap::iterator it = m_MapIdToFeature.find(parent);
            if (it != m_MapIdToFeature.end()) {
                return record.UpdateFeature(m_iFlags, it->second);
            }
        }
    }

    //  Special case: Piece of another feature we have already seen
    string id;
    if (record.GetAttribute("ID", id)) {
        IdToFeatureMap::iterator it = m_MapIdToFeature.find(id);
        if (it != m_MapIdToFeature.end()) {
            return record.UpdateFeature(m_iFlags, it->second);
        }
    }

    //  General case: brand new regular feature
    if (!record.InitializeFeature(m_iFlags, pFeature)) {
        return false;
    }

    string strId;
    if ( record.GetAttribute( "ID", strId ) ) {
        m_MapIdToFeature[ strId ] = pFeature;
    }
    return x_AddFeatureToAnnot( pFeature, pAnnot );
}
Example #2
0
//  ----------------------------------------------------------------------------
bool CGtfReader::x_CreateFeatureLocation(
    const CGff2Record& record,
    CRef< CSeq_feat > pFeature )
//  ----------------------------------------------------------------------------
{
    CRef<CSeq_id> pId = CReadUtil::AsSeqId(
        record.Id(), m_iFlags & fAllIdsAsLocal);

    CSeq_interval& location = pFeature->SetLocation().SetInt();
    location.SetId( *pId );
    location.SetFrom( record.SeqStart() );
    if (record.Type() != "mRNA") {
        location.SetTo(record.SeqStop());
    }
    else {
        // place holder
        //  actual location will be computed from the exons and CDSs living on 
        //  this feature.
        location.SetTo(record.SeqStart());
    }
    if ( record.IsSetStrand() ) {
        location.SetStrand( record.Strand() );
    }

    return true;
}
Example #3
0
//  ----------------------------------------------------------------------------
bool CGtfReader::x_MergeFeatureLocationSingleInterval(
    const CGff2Record& record,
    CRef< CSeq_feat > pFeature )
//  ----------------------------------------------------------------------------
{
    const CSeq_interval& gene_int = pFeature->GetLocation().GetInt();
    if ( gene_int.GetFrom() > record.SeqStart() -1 ) {
        pFeature->SetLocation().SetInt().SetFrom( record.SeqStart() );
    }
    if ( gene_int.GetTo() < record.SeqStop() - 1 ) {
        pFeature->SetLocation().SetInt().SetTo( record.SeqStop() );
    }
    if (record.Type() == "CDS"  &&  pFeature->GetData().IsCdregion()) {
        return x_FeatureTrimQualifiers(record, pFeature);
    }
    return true;
}
Example #4
0
//  ----------------------------------------------------------------------------
string s_FeatureKey(
    const CGff2Record& gff )
//  ----------------------------------------------------------------------------
{
    string strGeneId = s_GeneKey( gff );
    if ( gff.Type() == "gene" ) {
        return strGeneId;
    }

    string strTranscriptId;
    if ( ! gff.GetAttribute( "transcript_id", strTranscriptId ) ) {
        cerr << "Unexpected: GTF feature without a transcript_id." << endl;
        strTranscriptId = "transcript_id";
    }

    return strGeneId + "|" + strTranscriptId;
}
Example #5
0
//  -----------------------------------------------------------------------------
bool CGtfReader::x_CreateParentCds(
    const CGff2Record& gff,
    CRef< CSeq_annot > pAnnot )
//  -----------------------------------------------------------------------------
{
    //
    // Create a single cds feature.
	// This creation may either be triggered by an actual CDS feature found in the
	//	gtf, or by a feature that would imply a CDS feature (such as a start codon 
	//	or a stop codon). The latter is necessary because nothing the the gtf 
	//	standard stipulates that gtf features have to be arranged in any particular
	//	order.
    //
    CRef< CSeq_feat > pFeature( new CSeq_feat );

    string strType = gff.Type();
    if ( strType != "CDS"  &&  strType != "start_codon"  &&  strType != "stop_codon" ) {
        return false;
    }

    if ( ! x_FeatureSetDataCDS( gff, pFeature ) ) {
        return false;
    }
    if ( ! x_CreateFeatureLocation( gff, pFeature ) ) {
        return false;
    }
    if ( ! x_CreateFeatureId( gff, "cds", pFeature ) ) {
        return false;
    }
    if ( ! x_CreateGeneXrefs( gff, pFeature ) ) {
        return false;
    }
    if ( ! x_CreateMrnaXrefs( gff, pFeature ) ) {
        return false;
    }
    if ( ! x_FeatureSetQualifiers( gff, pFeature ) ) {
        return false;
    }

    m_CdsMap[ s_FeatureKey( gff ) ] = pFeature;

    return xAddFeatureToAnnot( pFeature, pAnnot );
}
Example #6
0
//  ----------------------------------------------------------------------------
bool CGtfReader::x_UpdateAnnotCds(
    const CGff2Record& gff,
    CRef< CSeq_annot > pAnnot )
//  ----------------------------------------------------------------------------
{
    //
    // If there is no gene feature to go with this CDS then make one. Otherwise,
    //  make sure the existing gene feature includes the location of the CDS.
    //
    CRef< CSeq_feat > pGene;
    if ( ! x_FindParentGene( gff, pGene ) ) {
        if ( ! x_CreateParentGene( gff, pAnnot ) ) {
            return false;
        }
    }
    else {
        if ( ! x_MergeParentGene( gff, pGene ) ) {
            return false;
        }
    }
    
    //
    // If there is no CDS feature with this gene_id|transcript_id then make one.
    //  Otherwise, fix up the location of the existing one.
    //
    CRef< CSeq_feat > pCds;
    if ( ! x_FindParentCds( gff, pCds ) ) {
        //
        // Create a brand new CDS feature:
        //
        if ( ! x_CreateParentCds( gff, pAnnot ) ) {
            return false;
        }
        x_FindParentCds( gff, pCds );
    }
    else {
        //
        // Update an already existing CDS features:
        //
        if ( ! x_MergeFeatureLocationMultiInterval( gff, pCds ) ) {
            return false;
        }
        if (!x_FeatureTrimQualifiers(gff, pCds)) {
            return false;
        }
    }

    if ( x_CdsIsPartial( gff ) ) {
        CRef<CSeq_feat> pParent;
        if ( x_FindParentMrna( gff, pParent ) ) {
            CSeq_loc& loc = pCds->SetLocation();
            size_t uCdsStart = gff.SeqStart();
            size_t uMrnaStart = pParent->GetLocation().GetStart( eExtreme_Positional );
            if ( uCdsStart == uMrnaStart ) {
                loc.SetPartialStart( true, eExtreme_Positional );
//                cerr << "fuzzed down: " << gff.SeqStart() << "  " << gff.SeqStop() << " vs. " << uMrnaStart << endl;
            }

            size_t uCdsStop =  gff.SeqStop();
            size_t uMrnaStop = pParent->GetLocation().GetStop( eExtreme_Positional );
            if ( uCdsStop == uMrnaStop  && gff.Type() != "stop_codon" ) {
                loc.SetPartialStop( true, eExtreme_Positional );
//                cerr << "fuzzed up  : " << gff.SeqStart() << "  " << gff.SeqStop() << " vs. " << uMrnaStop << endl;
            }
        }
    }
    return true;
}
Example #7
0
//  ----------------------------------------------------------------------------
bool CGtfReader::x_UpdateAnnotFeature(
    const CGff2Record& gff,
    CRef< CSeq_annot > pAnnot,
    ILineErrorListener* pEC)
//  ----------------------------------------------------------------------------
{
    CRef< CSeq_feat > pFeature( new CSeq_feat );

    //
    // Handle officially recognized GTF types:
    //
    string strType = gff.Type();
    if ( strType == "CDS" ) {
        //
        // Observations:
        // Location does not include the stop codon hence must be fixed up once
        //  the stop codon is seen.
        //
        return x_UpdateAnnotCds( gff, pAnnot );
    }
    if ( strType == "start_codon" ) {
        //
        // Observation:
        // Comes in up to three pieces (depending on splicing).
        // Location _is_ included in CDS.
        //
        return x_UpdateAnnotStartCodon( gff, pAnnot );
    }
    if ( strType == "stop_codon" ) {
        //
        // Observation:
        // Comes in up to three pieces (depending on splicing).
        // Location not included in CDS hence must be used to fix up location of
        //  the coding region.
        //
        return x_UpdateAnnotStopCodon( gff, pAnnot );
    }
    if ( strType == "5UTR" ) {
        return x_UpdateAnnot5utr( gff, pAnnot );
    }
    if ( strType == "3UTR" ) {
        return x_UpdateAnnot3utr( gff, pAnnot );
    }
    if ( strType == "inter" ) {
        return x_UpdateAnnotInter( gff, pAnnot );
    }
    if ( strType == "inter_CNS" ) {
        return x_UpdateAnnotInterCns( gff, pAnnot );
    }
    if ( strType == "intron_CNS" ) {
        return x_UpdateAnnotIntronCns( gff, pAnnot );
    }
    if ( strType == "exon"  ||
         strType == "initial"  ||
         strType == "internal"  ||
         strType == "terminal"  ||
         strType == "single") {
        return x_UpdateAnnotExon( gff, pAnnot );
    }

    //
    //  Every other type is not officially sanctioned GTF, and per spec we are
    //  supposed to ignore it. In the spirit of being lenient on input we may
    //  try to salvage some of it anyway.
    //
    if ( strType == "gene" ) {
        //
        // Not an official GTF feature type but seen frequently. Hence we give
        //  it some recognition.
        //
        if ( ! x_CreateParentGene( gff, pAnnot ) ) {
            return false;
        }
    }
    if (strType == "mRNA") {
        if ( ! x_CreateParentMrna(gff, pAnnot) ) {
            return false;
        }
    }
    return x_UpdateAnnotMiscFeature( gff, pAnnot );
}