void CGeneFinder::CGeneSearchPlugin::setUpFeatureIterator ( CBioseq_Handle &ignored_bioseq_handle, auto_ptr<CFeat_CI> &feat_ci, TSeqPos circular_length, CRange<TSeqPos> &range, const CSeq_loc& loc, SAnnotSelector &sel, CScope &scope, ENa_strand &strand ) { if ( m_BioseqHandle ) { // if we're circular, we may need to split our range into two pieces if( ( circular_length != kInvalidSeqPos ) && ( range.GetFrom() > range.GetTo() )) { // For circular locations, the "from" is greater than the "to", which // would not work properly if given to CFeat_CI. // So, as a work around, we transform the range // into a mix location of the form "join(0..to, from..MAXINT)" CRef<CSeq_loc> new_loc( new CSeq_loc ); new_loc->SetInt().SetFrom( 0 ); new_loc->SetInt().SetTo( range.GetTo() ); CRef<CSeq_loc> otherHalfOfRange( new CSeq_loc ); otherHalfOfRange->SetInt().SetFrom( range.GetFrom() ); otherHalfOfRange->SetInt().SetTo( kMax_Int ); new_loc->Add( *otherHalfOfRange ); new_loc->SetStrand( loc.GetStrand() ); new_loc->SetId( *loc.GetId() ); feat_ci.reset( new CFeat_CI(scope, *new_loc, sel) ); } else { // remove far parts, if necessary bool loc_change_needed = false; ITERATE( CSeq_loc, loc_iter, loc ) { if( ! m_BioseqHandle.IsSynonym( loc_iter.GetSeq_id() ) ) { loc_change_needed = true; break; } } if( loc_change_needed ) { CRef<CSeq_loc> new_loc( new CSeq_loc ); ITERATE( CSeq_loc, loc_iter, loc ) { if( m_BioseqHandle.IsSynonym( loc_iter.GetSeq_id() ) ) { new_loc->Add( *loc_iter.GetRangeAsSeq_loc() ); } } feat_ci.reset( new CFeat_CI(scope, *new_loc, sel) ); } else { feat_ci.reset( new CFeat_CI(scope, loc, sel) ); } } } else {
void CSeq_loc_equiv::Add(const CSeq_loc& loc) { if ( loc.IsEquiv() ) { copy(loc.GetEquiv().Get().begin(), loc.GetEquiv().Get().end(), back_inserter(Set())); } else { CRef<CSeq_loc> loc2(new CSeq_loc); loc2->Assign(loc); Set().push_back(loc2); } }
bool CLocation_constraint :: x_DoesLocationMatchPartialnessConstraint(const CSeq_loc& loc) const { bool partial5 = loc.IsPartialStart(eExtreme_Biological); bool partial3 = loc.IsPartialStop(eExtreme_Biological); if ( (GetPartial5() == ePartial_constraint_partial && !partial5) || (GetPartial5() == ePartial_constraint_complete && partial5) || (GetPartial3() == ePartial_constraint_partial && !partial3) || (GetPartial3() == ePartial_constraint_complete && partial3) ) { return false; } else return true; };
// ========================================================================= void CWiggleReader::xSetTotalLoc(CSeq_loc& loc, CSeq_id& chrom_id) // ========================================================================= { if ( m_Values.empty() ) { loc.SetEmpty(chrom_id); } else { CSeq_interval& interval = loc.SetInt(); interval.SetId(chrom_id); interval.SetFrom(m_Values.front().m_Pos); interval.SetTo(m_Values.back().GetEnd()-1); } }
CRef<CSeq_align> CNWAligner::Run(CScope &scope, const CSeq_loc &loc1, const CSeq_loc &loc2, bool trim_end_gaps) { if ((!loc1.IsInt() && !loc1.IsWhole()) || (!loc1.IsInt() && !loc1.IsWhole())) { NCBI_THROW(CException, eUnknown, "Only whole and interval locations supported"); } CSeqVector vec1(loc1, scope, CBioseq_Handle::eCoding_Iupac); string seq1; vec1.GetSeqData(0, vec1.size(), seq1); CSeqVector vec2(loc2, scope, CBioseq_Handle::eCoding_Iupac); string seq2; vec2.GetSeqData(0, vec2.size(), seq2); SetSequences(seq1,seq2); Run(); CRef<CSeq_align> align(new CSeq_align); align->SetType(CSeq_align::eType_partial); align->SetSegs().SetDenseg(*GetDense_seg( loc1.GetStart(eExtreme_Biological), loc1.GetStrand(), *loc1.GetId(), loc2.GetStart(eExtreme_Biological), loc2.GetStrand(), *loc2.GetId(), trim_end_gaps)); return align; }
CRef<CSeq_loc> CGetSeqLocFromStringHelper::Seq_loc_Add( const CSeq_loc& loc1, const CSeq_loc& loc2, CSeq_loc::TOpFlags flags ) { // No ISynonymMapper due to lack of a CScope return loc1.Add(loc2, flags, NULL); }
bool CLocation_constraint :: x_DoesLocationMatchDistanceConstraint(CConstRef <CBioseq> bioseq, const CSeq_loc& loc) const { if (!CanGetEnd5() && !CanGetEnd3()) { return true; } unsigned pos = loc.GetStop(eExtreme_Positional); int pos2; if (bioseq.NotEmpty()) { pos2 = (bioseq->IsSetLength() ? bioseq->GetLength() : 0) - pos - 1; } if (loc.GetStrand() == eNa_strand_minus) { if (CanGetEnd5()) { if (bioseq.Empty()) { return false; } else { if (!GetEnd5().Match(pos2)) { return false; } } } if (CanGetEnd3()) { return GetEnd3().Match(pos); } } else { if (CanGetEnd5() && !GetEnd5().Match(pos)) { return false; } if (CanGetEnd3()) { if (bioseq.Empty()) { return false; } return GetEnd3().Match(pos2); } } return true; };
bool CLocation_constraint :: x_DoesStrandMatchConstraint(const CSeq_loc& loc) const { if (loc.Which() == CSeq_loc::e_not_set) { return false; } if (GetStrand() == eStrand_constraint_any) { return true; } if (loc.GetStrand() == eNa_strand_minus) { if (GetStrand() == eStrand_constraint_minus) { return true; } else return false; } else { if (GetStrand() == eStrand_constraint_plus) { return true; } else return false; } };
CSeqVector::CSeqVector(const CSeq_loc& loc, CScope& scope, EVectorCoding coding, ENa_strand strand) : m_Scope(&scope), m_SeqMap(CSeqMap::GetSeqMapForSeq_loc(loc, &scope)), m_Strand(strand), m_Coding(CSeq_data::e_not_set) { if ( const CSeq_id* id = loc.GetId() ) { if ( CBioseq_Handle bh = scope.GetBioseqHandle(*id) ) { m_TSE = bh.GetTSE_Handle(); } } m_Size = m_SeqMap->GetLength(m_Scope); m_Mol = m_SeqMap->GetMol(); SetCoding(coding); }
void s_BuildMaskedRanges(CSeqMasker::TMaskList & masks, const CSeq_loc & seqloc, CSeq_id & query_id, TMaskedQueryRegions * mqr, CRef<CSeq_loc> * psl) { TSeqPos query_start = seqloc.GetStart(eExtreme_Positional); // This needs to be examined further for places where a +1, -1, // etc is needed due to biological vs. computer science offset // notations. ITERATE(CSeqMasker::TMaskList, pr, masks) { CRef<CSeq_interval> ival(new CSeq_interval); TSeqPos start = pr->first, end = pr->second; ival->SetFrom (query_start + start); ival->SetTo (query_start + end); ival->SetId (query_id); ival->SetStrand(eNa_strand_both); if (mqr) { CRef<CSeqLocInfo> info_plus (new CSeqLocInfo(&* ival, CSeqLocInfo::eFramePlus1)); mqr->push_back(info_plus); CRef<CSeqLocInfo> info_minus (new CSeqLocInfo(&* ival, CSeqLocInfo::eFrameMinus1)); mqr->push_back(info_minus); } if (psl) { if (psl->Empty()) { psl->Reset(new CSeq_loc); } (**psl).SetPacked_int().Set().push_back(ival); } }
// Corresponds to SortFeatItemListByPos from the C toolkit int CSeq_feat::CompareNonLocation(const CSeq_feat& f2, const CSeq_loc& loc1, const CSeq_loc& loc2) const { const CSeqFeatData& data1 = GetData(); const CSeqFeatData& data2 = f2.GetData(); CSeqFeatData::E_Choice type1 = data1.Which(); CSeqFeatData::E_Choice type2 = data2.Which(); // operon first if ( int diff = s_IsOperon(data2) - s_IsOperon(data1) ) { return diff; } if ( type1 != type2 ) { // order by feature type int order1 = GetTypeSortingOrder(type1); int order2 = GetTypeSortingOrder(type2); int diff = order1 - order2; if ( diff != 0 ) return diff; } // minus strand last ENa_strand strand1 = loc1.GetStrand(); ENa_strand strand2 = loc2.GetStrand(); if ( int diff = IsReverse(strand1) - IsReverse(strand2) ) { return diff; } if ( int diff = loc1.CompareSubLoc(loc2, strand1) ) { return diff; } {{ // compare subtypes CSeqFeatData::ESubtype subtype1 = data1.GetSubtype(); CSeqFeatData::ESubtype subtype2 = data2.GetSubtype(); int diff = subtype1 - subtype2; if ( diff != 0 ) return diff; }} // subtypes are equal, types must be equal too _ASSERT(type1 == type2); // type dependent comparison if ( type1 == CSeqFeatData::e_Cdregion ) { // compare frames of identical CDS ranges if ( int diff = s_GetCdregionOrder(data1)-s_GetCdregionOrder(data2) ) { return diff; } } else if ( type1 == CSeqFeatData::e_Imp ) { // compare labels of imp features int diff = NStr::CompareNocase(data1.GetImp().GetKey(), data2.GetImp().GetKey()); if ( diff != 0 ) return diff; } // XXX - should compare parent seq-annots // XXX 1. parent Seq-annot idx.itemID // XXX 2. features itemID return 0; // unknown }
int CSampleLds2Application::Run(void) { // Process command line args const CArgs& args = GetArgs(); const string& db_path = args["db"].AsString(); // // Initialize the local data storage // if ( args["data_dir"] ) { try { CRef<CLDS2_Manager> mgr(new CLDS2_Manager(db_path)); // Allow to split GB release bioseq-sets mgr->SetGBReleaseMode(CLDS2_Manager::eGB_Guess); if ( args["group_aligns"] ) { mgr->SetSeqAlignGroupSize(args["group_aligns"].AsInteger()); } mgr->AddDataDir(args["data_dir"].AsString()); mgr->UpdateData(); } catch(CException& e) { ERR_POST("Error initializing local data storage: " << e.what()); return 1; } } // Create OM and LDS2 data loader CRef<CObjectManager> object_manager = CObjectManager::GetInstance(); try { CLDS2_DataLoader::RegisterInObjectManager(*object_manager, db_path, -1, CObjectManager::eDefault); } catch (CException& e) { ERR_POST("Error registering LDS2 data loader: " << e.what()); return 2; } // Check if an id was requested, try to fetch some data if ( args["id"] ) { string id = args["id"].AsString(); // Create Seq-id, set it to the GI specified on the command line CSeq_id seq_id(id); // Create a new scope ("attached" to our OM). CScope scope(*object_manager); // Add default loaders (GB loader in this demo) to the scope. scope.AddDefaults(); // Get synonyms CBioseq_Handle::TId bh_ids = scope.GetIds(seq_id); NcbiCout << "Synonyms for " << id << ": "; string sep = ""; ITERATE (CBioseq_Handle::TId, id_it, bh_ids) { cout << sep << id_it->AsString(); sep = ", "; } cout << endl; // Get Bioseq handle for the Seq-id. CBioseq_Handle bioseq_handle = scope.GetBioseqHandle(seq_id); if ( !bioseq_handle ) { ERR_POST("Bioseq not found, with id=" << id); } else { // Dump the seq-entry. if ( args["print_entry"] ) { cout << MSerial_AsnText << *bioseq_handle.GetTopLevelEntry().GetCompleteSeq_entry(); } } // Test features SAnnotSelector sel; sel.SetSearchUnresolved() .SetResolveAll(); CSeq_loc loc; loc.SetWhole().Assign(seq_id); cout << "Features by location:" << endl; CFeat_CI fit(scope, loc, sel); int fcount = 0; for (; fit; ++fit) { if ( args["print_feats"] ) { cout << MSerial_AsnText << fit->GetOriginalFeature(); } fcount++; } cout << fcount << " features found" << endl; cout << "Features by product:" << endl; sel.SetByProduct(true); CFeat_CI fitp(scope, loc, sel); fcount = 0; for (; fitp; ++fitp) { if ( args["print_feats"] ) { cout << MSerial_AsnText << fitp->GetOriginalFeature(); } fcount++; } cout << fcount << " features found" << endl; // Test alignments cout << "Alignments:" << endl; sel.SetByProduct(false); CAlign_CI ait(scope, loc, sel); int acount = 0; for (; ait; ++ait) { if ( args["print_aligns"] ) { cout << MSerial_AsnText << ait.GetOriginalSeq_align(); } acount++; } cout << acount << " alignments found" << endl; }
int CLocalFinderApp::Run(void) { CArgs myargs = GetArgs(); int left = myargs["from"].AsInteger(); int right = myargs["to"].AsInteger(); bool repeats = myargs["rep"]; // // read our sequence data // CFastaReader fastareader(myargs["input"].AsString()); CRef<CSeq_loc> masked_regions; masked_regions = fastareader.SaveMask(); CRef<CSeq_entry> se = fastareader.ReadOneSeq(); if(masked_regions) { CBioseq& bioseq = se->SetSeq(); // assumes that reader gets only one sequence per fasta id (no [] in file) CRef<CSeq_annot> seq_annot(new CSeq_annot); seq_annot->SetNameDesc("NCBI-FASTA-Lowercase"); bioseq.SetAnnot().push_back(seq_annot); CSeq_annot::C_Data::TFtable* feature_table = &seq_annot->SetData().SetFtable(); for(CSeq_loc_CI i(*masked_regions); i; ++i) { CRef<CSeq_feat> repeat(new CSeq_feat); CRef<CSeq_id> id(new CSeq_id); id->Assign(i.GetSeq_id()); CRef<CSeq_loc> loc(new CSeq_loc(*id, i.GetRange().GetFrom(), i.GetRange().GetTo())); repeat->SetLocation(*loc); repeat->SetData().SetImp().SetKey("repeat_region"); feature_table->push_back(repeat); } } CRef<CObjectManager> objmgr = CObjectManager::GetInstance(); CScope scope(*objmgr); scope.AddTopLevelSeqEntry(*se); CRef<CSeq_id> cntg(new CSeq_id); cntg->Assign(*se->GetSeq().GetFirstId()); CSeq_loc loc; loc.SetWhole(*cntg); CSeqVector vec(loc, scope); vec.SetIupacCoding(); CResidueVec seq; ITERATE(CSeqVector,i,vec) seq.push_back(*i); // read the alignment information TGeneModelList alignments; if(myargs["align"]) { CNcbiIstream& alignmentfile = myargs["align"].AsInputFile(); string our_contig = cntg->GetSeqIdString(true); string cur_contig; CAlignModel algn; while(alignmentfile >> algn >> getcontig(cur_contig)) { if (cur_contig==our_contig) alignments.push_back(algn); } } // create engine CRef<CHMMParameters> hmm_params(new CHMMParameters(myargs["model"].AsInputFile())); CGnomonEngine gnomon(hmm_params, seq, TSignedSeqRange(left, right)); // run! gnomon.Run(alignments, repeats, true, true, false, false, 10.0); // dump the annotation CRef<CSeq_annot> annot = gnomon.GetAnnot(*cntg); auto_ptr<CObjectOStream> os(CObjectOStream::Open(eSerial_AsnText, cout)); *os << *annot; return 0; }
CAnnotCompare::TCompareFlags CAnnotCompare::CompareFeats(const CSeq_feat& feat1, const CSeq_loc& loc1, CScope& scope1, const CSeq_feat& feat2, const CSeq_loc& loc2, CScope& scope2, vector<ECompareFlags>* complex_flags, list<string>* comments) { TCompareFlags loc_state = 0; ENa_strand strand1 = sequence::GetStrand(loc1, &scope1); ENa_strand strand2 = sequence::GetStrand(loc2, &scope2); if (!SameOrientation(strand1, strand2)) { loc_state |= eLocation_Missing; } else { sequence::ECompare comp_val = sequence::Compare(loc1, loc2, &scope1); switch (comp_val) { case sequence::eSame: loc_state |= eLocation_Same; break; case sequence::eOverlap: loc_state |= eLocation_Complex; break; case sequence::eContains: case sequence::eContained: {{ CSeq_loc_CI loc1_iter(loc1); size_t loc1_exons = 0; for ( ; loc1_iter; ++loc1_iter, ++loc1_exons) { } CSeq_loc_CI loc2_iter(loc2); size_t loc2_exons = 0; for ( ; loc2_iter; ++loc2_iter, ++loc2_exons) { } bool rev = IsReverse(strand1); TSeqRange range1 = loc1.GetTotalRange(); TSeqRange range2 = loc2.GetTotalRange(); if (loc1_exons == loc2_exons) { bool agreement_3prime; bool agreement_5prime; if (!rev) { agreement_5prime = range1.GetFrom() == range2.GetFrom(); agreement_3prime = range1.GetTo() == range2.GetTo(); } else { agreement_3prime = range1.GetFrom() == range2.GetFrom(); agreement_5prime = range1.GetTo() == range2.GetTo(); } loc1_iter.Rewind(); loc2_iter.Rewind(); bool agreement_internal = true; for (unsigned int i = 0; i < loc1_exons; ++i, ++loc1_iter, ++loc2_iter) { if ((i != 0 || rev) && (i != loc1_exons - 1 || !rev)) { if (loc1_iter.GetRange().GetFrom() != loc2_iter.GetRange().GetFrom()) { agreement_internal = false; break; } } if ((i != 0 || !rev) && (i != loc1_exons - 1 || rev)) { if (loc1_iter.GetRange().GetTo() != loc2_iter.GetRange().GetTo()) { agreement_internal = false; break; } } } if (!agreement_internal) { loc_state |= eLocation_Complex; } else if (agreement_5prime && !agreement_3prime) { loc_state |= eLocation_3PrimeExtension; } else if (agreement_3prime && !agreement_5prime) { loc_state |= eLocation_5PrimeExtension; } else { // both 3' and 5' disagreement loc_state |= eLocation_Complex; } } else { loc1_iter.Rewind(); loc2_iter.Rewind(); while (loc1_iter && loc2_iter) { if (loc1_iter.GetRange() == loc2_iter.GetRange()) { ++loc1_iter; ++loc2_iter; } else { if (loc1_exons > loc2_exons) { ++loc1_iter; } else { ++loc2_iter; } } } if ((loc1_exons > loc2_exons && !loc2_iter) || (loc2_exons > loc1_exons && !loc1_iter)) { loc_state |= eLocation_MissingExon; } else { loc_state |= eLocation_Complex; } } }} break; default: case sequence::eNoOverlap: loc_state |= eLocation_Missing; break; } } /// /// now, do a very simple sequence comparison /// CSeqVector v1(loc1, scope1); CSeqVector v2(loc2, scope2); CSeqVector_CI v1_iter = v1.begin(); CSeqVector_CI v2_iter = v2.begin(); TCompareFlags seq_state = 0; for (size_t count = 0; v1_iter != v1.end() && v2_iter != v2.end(); ++v1_iter, ++v2_iter, ++count) { if (*v1_iter != *v2_iter) { seq_state |= eSequence_DifferentSeq; break; } } if (v1_iter != v1.end() || v2_iter != v2.end()) { seq_state |= eSequence_DifferentSeq; } if (seq_state) { loc_state |= seq_state; } else { loc_state |= eSequence_SameSeq; } /// /// also compare products /// if (feat1.IsSetProduct() && feat2.IsSetProduct()) { CSeqVector v1(feat1.GetProduct(), scope1); CSeqVector v2(feat2.GetProduct(), scope2); CSeqVector_CI v1_iter = v1.begin(); CSeqVector_CI v2_iter = v2.begin(); for ( ; v1_iter != v1.end() && v2_iter != v2.end(); ++v1_iter, ++v2_iter) { if (*v1_iter != *v2_iter) { loc_state |= eSequence_DifferentProduct; break; } } if ((loc_state & eSequence_DifferentProduct) == 0) { loc_state |= eSequence_SameProduct; } } else if (feat1.IsSetProduct() != feat2.IsSetProduct()) { loc_state |= eSequence_DifferentProduct; } return loc_state; }