Exemplo n.º 1
0
void CGeneFinder::CGeneSearchPlugin::setUpFeatureIterator ( 
    CBioseq_Handle &ignored_bioseq_handle,
    auto_ptr<CFeat_CI> &feat_ci,
    TSeqPos circular_length,
    CRange<TSeqPos> &range,
    const CSeq_loc& loc,
    SAnnotSelector &sel,
    CScope &scope,
    ENa_strand &strand )
{
    if ( m_BioseqHandle ) {
        // if we're circular, we may need to split our range into two pieces
        if( ( circular_length != kInvalidSeqPos ) &&
            ( range.GetFrom() > range.GetTo() )) 
        {
            // For circular locations, the "from" is greater than the "to", which
            // would not work properly if given to CFeat_CI.
            // So, as a work around, we transform the range
            // into a mix location of the form "join(0..to, from..MAXINT)"

            CRef<CSeq_loc> new_loc( new CSeq_loc );
            new_loc->SetInt().SetFrom( 0 );
            new_loc->SetInt().SetTo( range.GetTo() );

            CRef<CSeq_loc> otherHalfOfRange( new CSeq_loc );
            otherHalfOfRange->SetInt().SetFrom( range.GetFrom() );
            otherHalfOfRange->SetInt().SetTo( kMax_Int );

            new_loc->Add( *otherHalfOfRange );

            new_loc->SetStrand( loc.GetStrand() );
            new_loc->SetId( *loc.GetId() );

            feat_ci.reset( new CFeat_CI(scope, *new_loc, sel) );
        } else {            
            // remove far parts, if necessary
            bool loc_change_needed = false;
            ITERATE( CSeq_loc, loc_iter, loc ) {
                if( ! m_BioseqHandle.IsSynonym( loc_iter.GetSeq_id() ) ) {
                    loc_change_needed = true;
                    break;
                }
            }
            if( loc_change_needed ) {
                CRef<CSeq_loc> new_loc( new CSeq_loc );
                ITERATE( CSeq_loc, loc_iter, loc ) {
                    if( m_BioseqHandle.IsSynonym( loc_iter.GetSeq_id() ) ) {
                        new_loc->Add( *loc_iter.GetRangeAsSeq_loc() );
                    }
                }
                feat_ci.reset( new CFeat_CI(scope, *new_loc, sel) );
            } else {
                feat_ci.reset( new CFeat_CI(scope, loc, sel) );
            }
        }
    } else {
Exemplo n.º 2
0
void CSeq_loc_equiv::Add(const CSeq_loc& loc)
{
    if ( loc.IsEquiv() ) {
        copy(loc.GetEquiv().Get().begin(), loc.GetEquiv().Get().end(), back_inserter(Set()));
    } else {
        CRef<CSeq_loc> loc2(new CSeq_loc);
        loc2->Assign(loc);
        Set().push_back(loc2);
    }
}
Exemplo n.º 3
0
bool CLocation_constraint :: x_DoesLocationMatchPartialnessConstraint(const CSeq_loc& loc) const
{
  bool partial5 = loc.IsPartialStart(eExtreme_Biological);
  bool partial3 = loc.IsPartialStop(eExtreme_Biological);
  if ( (GetPartial5() == ePartial_constraint_partial && !partial5)
         || (GetPartial5() == ePartial_constraint_complete && partial5) 
         || (GetPartial3() == ePartial_constraint_partial && !partial3)
         || (GetPartial3() == ePartial_constraint_complete && partial3) ) {
       return false;
    }
    else return true;
};
Exemplo n.º 4
0
//  =========================================================================
void CWiggleReader::xSetTotalLoc(CSeq_loc& loc, CSeq_id& chrom_id)
//  =========================================================================
{
    if ( m_Values.empty() ) {
        loc.SetEmpty(chrom_id);
    }
    else {
        CSeq_interval& interval = loc.SetInt();
        interval.SetId(chrom_id);
        interval.SetFrom(m_Values.front().m_Pos);
        interval.SetTo(m_Values.back().GetEnd()-1);
    }
}
Exemplo n.º 5
0
CRef<CSeq_align> CNWAligner::Run(CScope &scope, const CSeq_loc &loc1,
                                 const CSeq_loc &loc2, bool trim_end_gaps)
{
    if ((!loc1.IsInt() && !loc1.IsWhole()) ||
        (!loc1.IsInt() && !loc1.IsWhole()))
    {
        NCBI_THROW(CException, eUnknown,
                   "Only whole and interval locations supported");
    }
    CSeqVector vec1(loc1, scope, CBioseq_Handle::eCoding_Iupac);
    string seq1;
    vec1.GetSeqData(0, vec1.size(), seq1);
    CSeqVector vec2(loc2, scope, CBioseq_Handle::eCoding_Iupac);
    string seq2;
    vec2.GetSeqData(0, vec2.size(), seq2);
    SetSequences(seq1,seq2);
    Run();
    CRef<CSeq_align> align(new CSeq_align);
    align->SetType(CSeq_align::eType_partial);
    align->SetSegs().SetDenseg(*GetDense_seg(
        loc1.GetStart(eExtreme_Biological), loc1.GetStrand(), *loc1.GetId(),
        loc2.GetStart(eExtreme_Biological), loc2.GetStrand(), *loc2.GetId(),
        trim_end_gaps));
    return align;
}
Exemplo n.º 6
0
CRef<CSeq_loc>
CGetSeqLocFromStringHelper::Seq_loc_Add(
        const CSeq_loc&    loc1,
        const CSeq_loc&    loc2,
        CSeq_loc::TOpFlags flags )
{
    // No ISynonymMapper due to lack of a CScope
    return loc1.Add(loc2, flags, NULL);
}
Exemplo n.º 7
0
bool CLocation_constraint :: x_DoesLocationMatchDistanceConstraint(CConstRef <CBioseq> bioseq, const CSeq_loc& loc) const
{
  if (!CanGetEnd5() && !CanGetEnd3()) {
      return true;
  }

  unsigned pos = loc.GetStop(eExtreme_Positional);
  int pos2;
  if (bioseq.NotEmpty()) {
     pos2 = (bioseq->IsSetLength() ?  bioseq->GetLength() : 0) - pos - 1;
  }

  if (loc.GetStrand() == eNa_strand_minus) {
    if (CanGetEnd5()) {
      if (bioseq.Empty()) {
          return false;
      }
      else {
        if (!GetEnd5().Match(pos2)) {
           return false;
        }
      }
    }
    if (CanGetEnd3()) {
        return GetEnd3().Match(pos);
    }
  }
  else
  {
    if (CanGetEnd5() && !GetEnd5().Match(pos)) {
        return false;
    }
    if (CanGetEnd3()) {
      if (bioseq.Empty()) {
         return false;
      }
      return GetEnd3().Match(pos2);
    }
  }
  return true;
};
Exemplo n.º 8
0
bool CLocation_constraint :: x_DoesStrandMatchConstraint(const CSeq_loc& loc) const
{
  if (loc.Which() == CSeq_loc::e_not_set) {
     return false;
  }
  if (GetStrand() == eStrand_constraint_any) {
     return true;
  }

  if (loc.GetStrand() == eNa_strand_minus) {
      if (GetStrand() == eStrand_constraint_minus) {
         return true;
      }
      else return false;
  }
  else {
     if (GetStrand() == eStrand_constraint_plus) {
       return true;
     }
     else return false;
  }
};
Exemplo n.º 9
0
CSeqVector::CSeqVector(const CSeq_loc& loc, CScope& scope,
                       EVectorCoding coding, ENa_strand strand)
    : m_Scope(&scope),
      m_SeqMap(CSeqMap::GetSeqMapForSeq_loc(loc, &scope)),
      m_Strand(strand),
      m_Coding(CSeq_data::e_not_set)
{
    if ( const CSeq_id* id = loc.GetId() ) {
        if ( CBioseq_Handle bh = scope.GetBioseqHandle(*id) ) {
            m_TSE = bh.GetTSE_Handle();
        }
    }
    m_Size = m_SeqMap->GetLength(m_Scope);
    m_Mol = m_SeqMap->GetMol();
    SetCoding(coding);
}
Exemplo n.º 10
0
void s_BuildMaskedRanges(CSeqMasker::TMaskList & masks,
                         const CSeq_loc        & seqloc,
                         CSeq_id               & query_id,
                         TMaskedQueryRegions   * mqr,
                         CRef<CSeq_loc>        * psl)
{
    TSeqPos query_start = seqloc.GetStart(eExtreme_Positional);
    
    // This needs to be examined further for places where a +1, -1,
    // etc is needed due to biological vs. computer science offset
    // notations.
    
    ITERATE(CSeqMasker::TMaskList, pr, masks) {
        CRef<CSeq_interval> ival(new CSeq_interval);
        
        TSeqPos
            start  = pr->first,
            end    = pr->second;
        
        ival->SetFrom (query_start + start);
        ival->SetTo   (query_start + end);
        ival->SetId   (query_id);
        ival->SetStrand(eNa_strand_both);
        
        if (mqr) {
            CRef<CSeqLocInfo> info_plus
                (new CSeqLocInfo(&* ival, CSeqLocInfo::eFramePlus1));
            mqr->push_back(info_plus);

            CRef<CSeqLocInfo> info_minus
                (new CSeqLocInfo(&* ival, CSeqLocInfo::eFrameMinus1));
            mqr->push_back(info_minus);
        }
        
        if (psl) {
            if (psl->Empty()) {
                psl->Reset(new CSeq_loc);
            }
            (**psl).SetPacked_int().Set().push_back(ival);
        }
    }
Exemplo n.º 11
0
// Corresponds to SortFeatItemListByPos from the C toolkit
int CSeq_feat::CompareNonLocation(const CSeq_feat& f2,
                                  const CSeq_loc& loc1,
                                  const CSeq_loc& loc2) const
{
    const CSeqFeatData& data1 = GetData();
    const CSeqFeatData& data2 = f2.GetData();
    CSeqFeatData::E_Choice type1 = data1.Which();
    CSeqFeatData::E_Choice type2 = data2.Which();

    // operon first
    if ( int diff = s_IsOperon(data2) - s_IsOperon(data1) ) {
        return diff;
    }
    if ( type1 != type2 ) {
        // order by feature type
        int order1 = GetTypeSortingOrder(type1);
        int order2 = GetTypeSortingOrder(type2);
        int diff = order1 - order2;
        if ( diff != 0 )
            return diff;
    }

    // minus strand last
    ENa_strand strand1 = loc1.GetStrand();
    ENa_strand strand2 = loc2.GetStrand();
    if ( int diff = IsReverse(strand1) - IsReverse(strand2) ) {
        return diff;
    }

    if ( int diff = loc1.CompareSubLoc(loc2, strand1) ) {
        return diff;
    }

    {{ // compare subtypes
        CSeqFeatData::ESubtype subtype1 = data1.GetSubtype();
        CSeqFeatData::ESubtype subtype2 = data2.GetSubtype();
        int diff = subtype1 - subtype2;
        if ( diff != 0 )
            return diff;
    }}

    // subtypes are equal, types must be equal too
    _ASSERT(type1 == type2);

    // type dependent comparison
    if ( type1 == CSeqFeatData::e_Cdregion ) {
        // compare frames of identical CDS ranges
        if ( int diff = s_GetCdregionOrder(data1)-s_GetCdregionOrder(data2) ) {
            return diff;
        }
    }
    else if ( type1 == CSeqFeatData::e_Imp ) {
        // compare labels of imp features
        int diff = NStr::CompareNocase(data1.GetImp().GetKey(),
                                       data2.GetImp().GetKey());
        if ( diff != 0 )
            return diff;
    }

    // XXX - should compare parent seq-annots
    // XXX 1. parent Seq-annot idx.itemID
    // XXX 2. features itemID

    return 0; // unknown
}
Exemplo n.º 12
0
int CSampleLds2Application::Run(void)
{
    // Process command line args
    const CArgs& args = GetArgs();

    const string& db_path = args["db"].AsString();

    //
    // Initialize the local data storage
    //
    if ( args["data_dir"] ) {
        try {
            CRef<CLDS2_Manager> mgr(new CLDS2_Manager(db_path));
            // Allow to split GB release bioseq-sets
            mgr->SetGBReleaseMode(CLDS2_Manager::eGB_Guess);
            if ( args["group_aligns"] ) {
                mgr->SetSeqAlignGroupSize(args["group_aligns"].AsInteger());
            }
            mgr->AddDataDir(args["data_dir"].AsString());
            mgr->UpdateData();
        }
        catch(CException& e) {
            ERR_POST("Error initializing local data storage: " << e.what());
            return 1;
        }
    }

    // Create OM and LDS2 data loader
    CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
    try {
        CLDS2_DataLoader::RegisterInObjectManager(*object_manager,
            db_path, -1, CObjectManager::eDefault);
    }
    catch (CException& e) {
        ERR_POST("Error registering LDS2 data loader: " << e.what());
        return 2;
    }

    // Check if an id was requested, try to fetch some data
    if ( args["id"] ) {
        string id = args["id"].AsString();
        // Create Seq-id, set it to the GI specified on the command line
        CSeq_id seq_id(id);
        // Create a new scope ("attached" to our OM).
        CScope scope(*object_manager);
        // Add default loaders (GB loader in this demo) to the scope.
        scope.AddDefaults();

        // Get synonyms
        CBioseq_Handle::TId bh_ids = scope.GetIds(seq_id);
        NcbiCout << "Synonyms for " << id << ": ";
        string sep = "";
        ITERATE (CBioseq_Handle::TId, id_it, bh_ids) {
            cout << sep << id_it->AsString();
            sep = ", ";
        }
        cout << endl;

        // Get Bioseq handle for the Seq-id.
        CBioseq_Handle bioseq_handle = scope.GetBioseqHandle(seq_id);
        if ( !bioseq_handle ) {
            ERR_POST("Bioseq not found, with id=" << id);
        }
        else {
            // Dump the seq-entry.
            if ( args["print_entry"] ) {
                cout << MSerial_AsnText <<
                    *bioseq_handle.GetTopLevelEntry().GetCompleteSeq_entry();
            }
        }

        // Test features
        SAnnotSelector sel;
        sel.SetSearchUnresolved()
            .SetResolveAll();
        CSeq_loc loc;
        loc.SetWhole().Assign(seq_id);
        cout << "Features by location:" << endl;
        CFeat_CI fit(scope, loc, sel);
        int fcount = 0;
        for (; fit; ++fit) {
            if ( args["print_feats"] ) {
                cout << MSerial_AsnText << fit->GetOriginalFeature();
            }
            fcount++;
        }
        cout << fcount << " features found" << endl;

        cout << "Features by product:" << endl;
        sel.SetByProduct(true);
        CFeat_CI fitp(scope, loc, sel);
        fcount = 0;
        for (; fitp; ++fitp) {
            if ( args["print_feats"] ) {
                cout << MSerial_AsnText << fitp->GetOriginalFeature();
            }
            fcount++;
        }
        cout << fcount << " features found" << endl;

        // Test alignments
        cout << "Alignments:" << endl;
        sel.SetByProduct(false);
        CAlign_CI ait(scope, loc, sel);
        int acount = 0;
        for (; ait; ++ait) {
            if ( args["print_aligns"] ) {
                cout << MSerial_AsnText << ait.GetOriginalSeq_align();
            }
            acount++;
        }
        cout << acount << " alignments found" << endl;
    }
Exemplo n.º 13
0
int CLocalFinderApp::Run(void)
{
    CArgs myargs = GetArgs();

    int left            = myargs["from"].AsInteger();
    int right           = myargs["to"].AsInteger();
    bool repeats        = myargs["rep"];


    //
    // read our sequence data
    //
    CFastaReader fastareader(myargs["input"].AsString());
    CRef<CSeq_loc> masked_regions;
    masked_regions = fastareader.SaveMask();
    CRef<CSeq_entry> se = fastareader.ReadOneSeq();
    
    if(masked_regions) {
        CBioseq& bioseq = se->SetSeq();     // assumes that reader gets only one sequence per fasta id (no [] in file)
        CRef<CSeq_annot> seq_annot(new CSeq_annot);
        seq_annot->SetNameDesc("NCBI-FASTA-Lowercase");
        bioseq.SetAnnot().push_back(seq_annot);
        CSeq_annot::C_Data::TFtable* feature_table = &seq_annot->SetData().SetFtable();
        for(CSeq_loc_CI i(*masked_regions); i; ++i) {
            CRef<CSeq_feat> repeat(new CSeq_feat);
            CRef<CSeq_id> id(new CSeq_id);
            id->Assign(i.GetSeq_id());
            CRef<CSeq_loc> loc(new CSeq_loc(*id, i.GetRange().GetFrom(), i.GetRange().GetTo()));
            repeat->SetLocation(*loc);
            repeat->SetData().SetImp().SetKey("repeat_region");
            feature_table->push_back(repeat);
        }
    }

    CRef<CObjectManager> objmgr = CObjectManager::GetInstance();
    CScope scope(*objmgr);
    scope.AddTopLevelSeqEntry(*se);       

    CRef<CSeq_id> cntg(new CSeq_id);
    cntg->Assign(*se->GetSeq().GetFirstId());
    CSeq_loc loc;
    loc.SetWhole(*cntg);
    CSeqVector vec(loc, scope);
    vec.SetIupacCoding();

    CResidueVec seq;
    ITERATE(CSeqVector,i,vec)
        seq.push_back(*i);

    // read the alignment information
    TGeneModelList alignments;
    if(myargs["align"]) {
        CNcbiIstream& alignmentfile = myargs["align"].AsInputFile();
        string our_contig = cntg->GetSeqIdString(true);
        string cur_contig; 
        CAlignModel algn;
        
        while(alignmentfile >> algn >> getcontig(cur_contig)) {
            if (cur_contig==our_contig)
                alignments.push_back(algn);
        }
    }

    // create engine
    CRef<CHMMParameters> hmm_params(new CHMMParameters(myargs["model"].AsInputFile()));
    CGnomonEngine gnomon(hmm_params, seq, TSignedSeqRange(left, right));

    // run!
    gnomon.Run(alignments, repeats, true, true, false, false, 10.0);

    // dump the annotation
    CRef<CSeq_annot> annot = gnomon.GetAnnot(*cntg);
    auto_ptr<CObjectOStream> os(CObjectOStream::Open(eSerial_AsnText, cout));
    *os << *annot;

    return 0;

}
Exemplo n.º 14
0
CAnnotCompare::TCompareFlags
CAnnotCompare::CompareFeats(const CSeq_feat& feat1,
                            const CSeq_loc& loc1,
                            CScope& scope1,
                            const CSeq_feat& feat2,
                            const CSeq_loc& loc2,
                            CScope& scope2,
                            vector<ECompareFlags>* complex_flags,
                            list<string>* comments)
{
    TCompareFlags loc_state = 0;

    ENa_strand strand1 = sequence::GetStrand(loc1, &scope1);
    ENa_strand strand2 = sequence::GetStrand(loc2, &scope2);
    if (!SameOrientation(strand1, strand2)) {
        loc_state |= eLocation_Missing;
    } else {
        sequence::ECompare comp_val = sequence::Compare(loc1, loc2, &scope1);
        switch (comp_val) {
        case sequence::eSame:
            loc_state |= eLocation_Same;
            break;

        case sequence::eOverlap:
            loc_state |= eLocation_Complex;
            break;

        case sequence::eContains:
        case sequence::eContained:
            {{
                CSeq_loc_CI loc1_iter(loc1);
                size_t loc1_exons = 0;
                for ( ;  loc1_iter;  ++loc1_iter, ++loc1_exons) {
                }
                CSeq_loc_CI loc2_iter(loc2);
                size_t loc2_exons = 0;
                for ( ;  loc2_iter;  ++loc2_iter, ++loc2_exons) {
                }
                bool rev = IsReverse(strand1);
                TSeqRange range1 = loc1.GetTotalRange();
                TSeqRange range2 = loc2.GetTotalRange();
                if (loc1_exons == loc2_exons) {
                    bool agreement_3prime;
                    bool agreement_5prime;
                    if (!rev) {
                        agreement_5prime =
                            range1.GetFrom() == range2.GetFrom();
                        agreement_3prime =
                            range1.GetTo() == range2.GetTo();
                    } else {
                        agreement_3prime =
                            range1.GetFrom() == range2.GetFrom();
                        agreement_5prime =
                            range1.GetTo() == range2.GetTo();
                    }

                    loc1_iter.Rewind();
                    loc2_iter.Rewind();
                    bool agreement_internal = true;
                    for (unsigned int i = 0;
                         i < loc1_exons;
                         ++i, ++loc1_iter, ++loc2_iter) {

                        if ((i != 0 || rev)
                            && (i != loc1_exons - 1 || !rev)) {
                            if (loc1_iter.GetRange().GetFrom()
                                != loc2_iter.GetRange().GetFrom()) {
                                agreement_internal = false;
                                break;
                            }
                        }

                        if ((i != 0 || !rev)
                            && (i != loc1_exons - 1 || rev)) {
                            if (loc1_iter.GetRange().GetTo()
                                != loc2_iter.GetRange().GetTo()) {
                                agreement_internal = false;
                                break;
                            }
                        }

                    }

                    if (!agreement_internal) {
                        loc_state |= eLocation_Complex;
                    } else if (agreement_5prime && !agreement_3prime) {
                        loc_state |= eLocation_3PrimeExtension;
                    } else if (agreement_3prime && !agreement_5prime) {
                        loc_state |= eLocation_5PrimeExtension;
                    } else {
                        // both 3' and 5' disagreement
                        loc_state |= eLocation_Complex;
                    }
                } else {
                    loc1_iter.Rewind();
                    loc2_iter.Rewind();
                    while (loc1_iter && loc2_iter) {
                        if (loc1_iter.GetRange() == loc2_iter.GetRange()) {
                            ++loc1_iter;
                            ++loc2_iter;
                        } else {
                            if (loc1_exons > loc2_exons) {
                                ++loc1_iter;
                            } else {
                                ++loc2_iter;
                            }
                        }
                    }
                    if ((loc1_exons > loc2_exons && !loc2_iter) ||
                        (loc2_exons > loc1_exons && !loc1_iter)) {
                        loc_state |= eLocation_MissingExon;
                    } else {
                        loc_state |= eLocation_Complex;
                    }
                }
            }}
            break;

        default:
        case sequence::eNoOverlap:
            loc_state |= eLocation_Missing;
            break;
        }
    }

    ///
    /// now, do a very simple sequence comparison
    ///
    CSeqVector v1(loc1, scope1);
    CSeqVector v2(loc2, scope2);
    CSeqVector_CI v1_iter = v1.begin();
    CSeqVector_CI v2_iter = v2.begin();
    TCompareFlags seq_state = 0;
    for (size_t count = 0;
         v1_iter != v1.end()  &&  v2_iter != v2.end();
         ++v1_iter, ++v2_iter, ++count) {
        if (*v1_iter != *v2_iter) {
            seq_state |= eSequence_DifferentSeq;
            break;
        }
    }
    if (v1_iter != v1.end()  ||  v2_iter != v2.end()) {
        seq_state |= eSequence_DifferentSeq;
    }
    if (seq_state) {
        loc_state |= seq_state;
    } else {
        loc_state |= eSequence_SameSeq;
    }

    ///
    /// also compare products
    ///
    if (feat1.IsSetProduct()  &&  feat2.IsSetProduct()) {
        CSeqVector v1(feat1.GetProduct(), scope1);
        CSeqVector v2(feat2.GetProduct(), scope2);
        CSeqVector_CI v1_iter = v1.begin();
        CSeqVector_CI v2_iter = v2.begin();
        for ( ;  v1_iter != v1.end()  &&  v2_iter != v2.end();  ++v1_iter, ++v2_iter) {
            if (*v1_iter != *v2_iter) {
                loc_state |= eSequence_DifferentProduct;
                break;
            }
        }
        if ((loc_state & eSequence_DifferentProduct) == 0) {
            loc_state |= eSequence_SameProduct;
        }
    } else if (feat1.IsSetProduct() != feat2.IsSetProduct()) {
        loc_state |= eSequence_DifferentProduct;
    }

    return loc_state;
}