bool ExtendJobMapperWithLinks::AlTestEdgeProximity::passes(vector<Alignment*>* matches, ScoredSeq* query){ bool foundPass = false; // this allows the query to overhang the window to a degree acceptable by the percent ID, // while also including a -1 that compensates for the zero-indexing of the final aligned pos long effQsizeM1 = long(float(query->size()) * _fractId) - 1; long aLastPos = query->size() - 1; vector<Alignment*>::iterator alIt = matches->begin(); vector<Alignment*>::iterator alEnd = matches->end(); while (alIt != alEnd){ Alignment* al = *alIt; long bLastPos; if ( al->isLinked(aLastPos, query) ){ bLastPos = al->getLinkage(aLastPos, query); } else { bLastPos = al->gapPairedAfter(aLastPos, query); } // test if the full sequence of the query would fit in the window given where the // alignment ends (it is possible that gaps push the 5p end of the query beyond the // window, but i don't want that to penalize the match). foundPass = _edgeProximity >= al->seqB()->size() - bLastPos + effQsizeM1; // if the test passed, skip to the end if (foundPass){ alIt = alEnd; } else { ++alIt; } } return foundPass; }
Alignment Sampler::mutate(const Alignment& aln, double base_error, double indel_error) { if (base_error == 0 && indel_error == 0) return aln; string bases = "ATGC"; uniform_real_distribution<double> rprob(0, 1); uniform_int_distribution<int> rbase(0, 3); Alignment mutaln; for (size_t i = 0; i < aln.path().mapping_size(); ++i) { auto& orig_mapping = aln.path().mapping(i); Mapping* new_mapping = mutaln.mutable_path()->add_mapping(); *new_mapping->mutable_position() = orig_mapping.position(); // for each edit in the mapping for (size_t j = 0; j < orig_mapping.edit_size(); ++j) { auto& orig_edit = orig_mapping.edit(j); auto new_edits = mutate_edit(orig_edit, make_pos_t(orig_mapping.position()), base_error, indel_error, bases, rprob, rbase); for (auto& edit : new_edits) { *new_mapping->add_edit() = edit; } } } // re-derive the alignment's sequence mutaln = simplify(mutaln); mutaln.set_sequence(alignment_seq(mutaln)); return mutaln; }
void test2() { std::string string_a = "CAGCCCTAC"; std::string string_b = "CCTGTACCC"; std::vector<std::vector<int>> similarity = { {+2, +0, +0, +0, -1}, {+0, +2, +0, +0, -1}, {+0, +0, +2, +0, -1}, {+0, +0, +0, +2, -1}, {-1, -1, -1, -1, +2} }; Alignment alignment = Alignment(string_a, string_b, Alignment::STR_WEIGHTS(1, -1, -1), similarity, [](int j) { return -(1); }); std::pair<std::string, std::string> result = alignment.global_alignment(); std::string expected_a = "CAGCCCTAC--"; std::string expected_b = "C--CTGTACCC"; if (result.first != expected_a || result.second != expected_b) { alignment.debugS(); this->testFailed("Test2"); } }
void irgen::applyLayoutAttributes(IRGenModule &IGM, CanType ASTTy, bool IsFixedLayout, Alignment &MinimumAlign) { assert(ASTTy && "shouldn't call applyLayoutAttributes without a type"); auto &Diags = IGM.Context.Diags; auto decl = ASTTy->getAnyNominal(); if (!decl) return; if (auto alignment = decl->getAttrs().getAttribute<AlignmentAttr>()) { auto value = alignment->getValue(); assert(value != 0 && ((value - 1) & value) == 0 && "alignment not a power of two!"); if (!IsFixedLayout) Diags.diagnose(alignment->getLocation(), diag::alignment_dynamic_type_layout_unsupported); else if (value < MinimumAlign.getValue()) Diags.diagnose(alignment->getLocation(), diag::alignment_less_than_natural, MinimumAlign.getValue()); else MinimumAlign = Alignment(value); } }
void test5() { std::string string_a = "GTATT"; std::string string_b = "TTT"; std::vector<std::vector<int>> similarity = { //A G C T - {+1, -1, -1, -1, -1}, {-1, +1, -1, -1, -1}, {-1, -1, +1, -1, -1}, {-1, -1, -1, +1, -1}, {-1, -1, -1, -1, +1} }; Alignment alignment = Alignment(string_a, string_b, Alignment::STR_WEIGHTS(1, -1, -1), similarity, [](int j) { return -(j+2); }); int result = alignment.get_similarity(); int expected_result = -3; if (result != expected_result) { alignment.debugS(); this->testFailed("Test5"); } }
data_STEMDIRECTION Layer::GetDrawingStemDir(const ArrayOfBeamElementCoords *coords) { assert(!coords->empty()); // Adjust the x position of the first and last element for taking into account the stem width LayerElement *first = dynamic_cast<LayerElement *>(coords->front()->m_element); LayerElement *last = dynamic_cast<LayerElement *>(coords->back()->m_element); if (!first || !last) { return m_drawingStemDir; } Measure *measure = dynamic_cast<Measure *>(this->GetFirstParent(MEASURE)); assert(measure); // First check if there is any <space> in the measure - if not we can return the layer stem direction if (!measure->FindChildByType(SPACE)) { return m_drawingStemDir; } Alignment *alignmentFirst = first->GetAlignment(); assert(alignmentFirst); Alignment *alignmentLast = last->GetAlignment(); assert(alignmentLast); // We are ignoring cross-staff situation here because this should not be called if we have one Staff *staff = dynamic_cast<Staff *>(first->GetFirstParent(STAFF)); assert(staff); double time = alignmentFirst->GetTime(); double duration = alignmentLast->GetTime() - time + last->GetAlignmentDuration(); duration = durRound(duration); return GetDrawingStemDir(time, duration, measure, staff->GetN()); }
data_STEMDIRECTION Layer::GetDrawingStemDir(LayerElement *element) { assert(element); Measure *measure = dynamic_cast<Measure *>(this->GetFirstParent(MEASURE)); assert(measure); // First check if there is any <space> in the measure - if not we can return the layer stem direction if (!measure->FindChildByType(SPACE)) { return m_drawingStemDir; } Alignment *alignment = element->GetAlignment(); assert(alignment); Layer *layer = NULL; Staff *staff = element->GetCrossStaff(layer); if (!staff) { staff = dynamic_cast<Staff *>(element->GetFirstParent(STAFF)); } // At this stage we have the parent or the cross-staff assert(staff); return GetDrawingStemDir(alignment->GetTime(), element->GetAlignmentDuration(), measure, staff->GetN()); }
Alignment Sampler::alignment_with_error(size_t length, double base_error, double indel_error) { size_t maxiter = 100; Alignment aln; if (base_error > 0 || indel_error > 0) { // sample a longer-than necessary alignment, then trim size_t iter = 0; while (iter++ < maxiter) { aln = mutate( alignment(length + 2 * ((double) length * indel_error)), base_error, indel_error); if (aln.sequence().size() == length) { break; } else if (aln.sequence().size() > length) { aln = strip_from_end(aln, aln.sequence().size() - length); break; } } if (iter == maxiter) { cerr << "[vg::Sampler] Warning: could not generate alignment of sufficient length. " << "Graph may be too small, or indel rate too high." << endl; } } else { aln = alignment(length); } return aln; }
Alignment SymForceAligner::cIntersection(int *a, int m, int* b, int n){ Alignment out; for (int j = 1; j <= m; j++) if (a[j] && b[a[j]] == j) out.insert(AlignmentPoint(a[j]-1, j-1)); return out; }
void test1() { std::string string_a = "GCATGCA"; std::string string_b = "GATTACA"; std::vector<std::vector<int>> similarity = { //A G C T - {+2, +0, +0, +0, -1}, {+0, +2, +0, +0, -1}, {+0, +0, +2, +0, -1}, {+0, +0, +0, +2, -1}, {-1, -1, -1, -1, +2}, }; Alignment alignment = Alignment(string_a, string_b, Alignment::STR_WEIGHTS(1, -1, -1), similarity, [](int j) { return -(j); }); std::pair<std::string, std::string> result = alignment.global_alignment(); std::string expected_a = "GCATG-CA"; std::string expected_b = "G-ATTACA"; std::cout << alignment.get_similarity(); if (result.first != expected_a || result.second != expected_b) { alignment.debugS(); this->testFailed("Test1"); } }
void Pileups::compute_from_alignment(VG& graph, Alignment& alignment) { if (alignment.is_reverse()) { flip_alignment(alignment); } const Path& path = alignment.path(); int64_t read_offset = 0; for (int i = 0; i < path.mapping_size(); ++i) { const Mapping& mapping = path.mapping(i); if (graph.has_node(mapping.position().node_id())) { const Node* node = graph.get_node(mapping.position().node_id()); NodePileup* pileup = get_create(node->id()); int64_t node_offset = mapping.position().offset(); for (int j = 0; j < mapping.edit_size(); ++j) { const Edit& edit = mapping.edit(j); // process all pileups in edit. // update the offsets as we go compute_from_edit(*pileup, node_offset, read_offset, *node, alignment, mapping, edit); } } } assert(alignment.sequence().empty() || alignment.path().mapping_size() == 0 || read_offset == alignment.sequence().length()); }
int main() { init(); ifstream fi("res/streptococcus_references.fasta"); vector<string> vs; string l; while (getline(fi, l)) { if (l[0] == '>') { vs.push_back(string()); } else { vs.back() += l; } } fi.close(); int cutoff = 10000; for (auto &s : vs) { s = s.substr(0, cutoff); } SquareAA algo; //NaiveCubeAA algo; Scoring sc; int start = clock(); Alignment sol = algo.align(vs[0], vs[1], sc); cout << "Run time: " << (float)(clock() - start) / CLOCKS_PER_SEC << endl; sol.output(cout, vs[0], vs[1]); system("pause"); }
void test0() { std::string string_a = "AGAGTCAATCCATAG"; std::string string_b = "CAGAGGTCCATCATG"; std::vector<std::vector<int>> similarity = { //A G C T - {+2, +0, +0, +0, -1}, {+0, +2, +0, +0, -1}, {+0, +0, +2, +0, -1}, {+0, +0, +0, +2, -1}, {-1, -1, -1, -1, +2}, }; Alignment alignment = Alignment(string_a, string_b, Alignment::STR_WEIGHTS(1, -1, -1), similarity); std::pair<std::string, std::string> result = alignment.global_alignment(); std::string expected_a = "-AGAG-TCAATCCATAG"; std::string expected_b = "CAGAGGTCCATC-AT-G"; std::cout << alignment.get_similarity(); if (result.first != expected_a || result.second != expected_b) { alignment.debugS(); this->testFailed("Test0"); } }
void QualAdjAligner::align(Alignment& alignment, Graph& g, bool print_score_matrices) { gssw_graph* graph = create_gssw_graph(g, 0, nullptr); const string& sequence = alignment.sequence(); const string& quality = alignment.quality(); if (quality.length() != sequence.length()) { cerr << "error:[Aligner] sequence and quality strings different lengths, cannot perform base quality adjusted alignmenterror:[Aligner] sequence and quality strings different lengths, cannot perform base quality adjusted alignment" << endl; } gssw_graph_fill_qual_adj(graph, sequence.c_str(), quality.c_str(), nt_table, adjusted_score_matrix, scaled_gap_open, scaled_gap_extension, 15, 2); gssw_graph_mapping* gm = gssw_graph_trace_back_qual_adj (graph, sequence.c_str(), quality.c_str(), sequence.size(), nt_table, adjusted_score_matrix, scaled_gap_open, scaled_gap_extension); gssw_mapping_to_alignment(graph, gm, alignment, print_score_matrices); #ifdef debug gssw_print_graph_mapping(gm, stderr); #endif gssw_graph_mapping_destroy(gm); gssw_graph_destroy(graph); }
void test4() { std::string string_a = "AGAGTCAATCCATAG"; std::string string_b = "CAGAGGTCCATCATG"; std::vector<std::vector<int>> similarity = { {+2, +0, +0, +0, -1}, {+0, +2, +0, +0, -1}, {+0, +0, +2, +0, -1}, {+0, +0, +0, +2, -1}, {-1, -1, -1, -1, +2} }; Alignment alignment = Alignment(string_a, string_b, Alignment::STR_WEIGHTS(1, -1, -1), similarity, [](int j) { return -(j+2); }); std::pair<std::string, std::string> result = alignment.global_alignment(); std::string expected_a = "-AGAG-TCAATCCATAG"; std::string expected_b = "CAGAGGTCCATC-AT-G"; if (result.first != expected_a || result.second != expected_b) { alignment.debugS(); this->testFailed("Test4"); } }
/** * Filter reads that are less than <PCTID> reference. * I.E. if a read matches the reference along 80% of its * length, and your cutoff is 90% PCTID, throw it out. */ Alignment Filter::percent_identity_filter(Alignment& aln){ double read_pctid = 0.0; //read pct_id = len(matching sequence / len(total sequence) int64_t aln_total_len = aln.sequence().size(); int64_t aln_match_len = 0; std::function<double(int64_t, int64_t)> calc_pct_id = [](int64_t rp, int64_t ttlp){ return ((double) rp / (double) ttlp); }; Path path = aln.path(); //TODO handle reversing mappings for (int i = 0; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); for (int j = 0; j < mapping.edit_size(); j++){ Edit ee = mapping.edit(j); if (ee.from_length() == ee.to_length() && ee.sequence() == ""){ aln_match_len += ee.to_length(); } } } if (calc_pct_id(aln_match_len, aln_total_len) < min_percent_identity){ return inverse ? aln : Alignment(); } return inverse ? Alignment() : aln; }
Alignment Filter::soft_clip_filter(Alignment& aln){ //Find overhangs - portions of the read that // are inserted at the ends. if (aln.path().mapping_size() > 0){ Path path = aln.path(); Edit left_edit = path.mapping(0).edit(0); Edit right_edit = path.mapping(path.mapping_size() - 1).edit(path.mapping(path.mapping_size() - 1).edit_size() - 1); int left_overhang = left_edit.to_length() - left_edit.from_length(); int right_overhang = right_edit.to_length() - right_edit.from_length(); if (left_overhang > soft_clip_limit || right_overhang > soft_clip_limit){ return inverse ? Alignment() : aln; } else{ return inverse ? aln : Alignment(); } } else{ if (aln.sequence().length() > soft_clip_limit){ return inverse ? Alignment() : aln; } cerr << "WARNING: SHORT ALIGNMENT: " << aln.sequence().size() << "bp" << endl << "WITH NO MAPPINGS TO REFERENCE" << endl << "CONSIDER REMOVING IT FROM ANALYSIS" << endl; return inverse ? Alignment() : aln; } }
pair<Alignment, Alignment> Filter::orientation_filter(Alignment& aln_first, Alignment& aln_second){ bool f_rev = false; bool s_rev = false; Path f_path = aln_first.path(); Path s_path = aln_second.path(); for (int i = 0; i < f_path.mapping_size(); i++){ if (f_path.mapping(i).position().is_reverse()){ f_rev = true; } } for (int j = 0; j < s_path.mapping_size(); j++){ if (s_path.mapping(j).position().is_reverse()){ s_rev = true; } } if (!s_rev != !f_rev){ return inverse ? std::make_pair(aln_first, aln_second) : std::make_pair(Alignment(), Alignment()); } else{ return inverse ? std::make_pair(Alignment(), Alignment()) : std::make_pair(aln_first, aln_second); } }
/*PE Functions*/ pair<Alignment, Alignment> Filter::one_end_anchored_filter(Alignment& aln_first, Alignment& aln_second){ if (aln_first.mapping_quality() == 0 | aln_second.mapping_quality() == 0){ return inverse ? std::make_pair(Alignment(), Alignment()) : std::make_pair(aln_first, aln_second); } else{ return inverse ? std::make_pair(aln_first, aln_second) : std::make_pair(Alignment(), Alignment()); } }
pair<Alignment, Alignment> Filter::interchromosomal_filter(Alignment& aln_first, Alignment& aln_second){ if (aln_first.path().name() != aln_second.path().name()){ return std::make_pair(aln_first, aln_second); } else{ return std::make_pair(Alignment(), Alignment()); } }
int main (int argc, char **argv) { int inputFormat = IF_NEXUS; int outputFormat = OF_STANDARD; if (argc > 2) { for (int i=2;i<argc;i++) { //cout << argv[i]; string tempString = argv[i]; if (tempString == "-p") { inputFormat = IF_PHYLIP; outputFormat = OF_PHYLIP; } } } else { cout << "Need size of bootstrap columns. " << endl; exit(0); } // First argument is the bootstrap size. int numCols = atoi(argv[1]); int randomSeed = time(0); // Second argument is the random seed. if (argc > 3) { randomSeed = atoi(argv[2]); } srand(randomSeed); Alignment myalign; myalign.setInputFormat(inputFormat); cin >> myalign; //cout << myalign; //myalign.setOutputFormat(OF_NEXUS); //cout << myalign; //myalign.setOutputFormat(OF_PHYLIP); //cout << myalign; //cout.precision(10); //cout << "Min: " << myalign.sequenceDivergenceMin() << endl; //cout << "Avg: " << myalign.sequenceDivergenceAvg() << endl; //myalign.printSequenceDivergencePairs(); Alignment bootstrapAlign = myalign.getBootstrap(1, numCols); bootstrapAlign.setOutputFormat(outputFormat); cout << bootstrapAlign; }
Alignment Filter::interchromosomal_filter(Alignment& aln){ bool fails = aln.path().name() != aln.fragment_prev().path().name(); if (fails){ return inverse ? Alignment() : aln; } else{ return inverse ? aln : Alignment(); } }
Alignment* AlignmentTest::GetAlignmentFor(std::size_t testIndex, SegmentsGroup** sg) { Alignment* ali = NULL; GraphAlignedSegment* gas = bench->GetResult(testIndex); *sg = bench->GetTest(testIndex); ali = new Alignment(); ali->AddSystem("", "hyp"); ali->AddGraphAlignedSegment(gas, "hyp", *sg); return ali; }
// feed adaptation data from a batch file containing entries (rawFile alignmentFile) void FMLLREstimator::feedAdaptationData(const char *strBatchFile, const char *strAlignmentFormat, double *dLikelihood) { BatchFile batchFile(strBatchFile,"features|alignment"); batchFile.load(); for(unsigned int i=0 ; i < batchFile.size() ; ++i) { //for(int i=0 ; i < 5 ; ++i) { // load the alignment Alignment *alignment = NULL; if (strcmp(strAlignmentFormat,"text") == 0) { AlignmentFile alignmentFile(m_phoneSet); VPhoneAlignment *vPhoneAlignment = alignmentFile.load(batchFile.getField(i,"alignment")); assert(vPhoneAlignment); alignment = AlignmentFile::toAlignment(m_phoneSet,m_hmmManager,vPhoneAlignment); AlignmentFile::destroyPhoneAlignment(vPhoneAlignment); } else { alignment = Alignment::load(batchFile.getField(i,"alignment"),NULL); assert(alignment); } // load the feature vectors FeatureFile featureFile(batchFile.getField(i,"features"),MODE_READ); featureFile.load(); Matrix<float> *mFeatures = featureFile.getFeatureVectors(); // load and apply the transform /* Transform *transform = new Transform(); transform->load("/data/daniel/tasks/wsj/experiments/may16th_2013_CMNUtterance/5/fmllr1/transforms/440m.fmllr.bin"); Matrix<float> *mFeaturesX = transform->apply(*mFeatures); mFeatures = mFeaturesX; delete transform; */ // check consistency if (mFeatures->getRows() != alignment->getFrames()) { BVC_ERROR << "inconsistent number of feature vectors / alignment file"; } // accumulate adaptation data double dLikelihoodAlignment = 0.0; feedAdaptationData(*mFeatures,alignment,&dLikelihoodAlignment); BVC_VERB << "loaded file: " << batchFile.getField(i,"alignment") << " likelihood: " << FLT(10,2) << dLikelihoodAlignment << " (" << mFeatures->getRows() << "frames)"; *dLikelihood += dLikelihoodAlignment; // clean-up delete alignment; delete mFeatures; } double dLikelihoodFrame = (*dLikelihood)/m_fOccupancyTotal; BVC_VERB << "total likelihood: " << FLT(20,6) << *dLikelihood << " (likelihood per frame: " << FLT(8,4) << dLikelihoodFrame << ")"; }
// act like the path this is against is the reference // and generate an equivalent cigar string cigar_against_path(const Alignment& alignment) { vector<pair<int, char> > cigar; if (!alignment.has_path()) return ""; const Path& path = alignment.path(); int l = 0; for (const auto& mapping : path.mapping()) { mapping_cigar(mapping, cigar); } return cigar_string(cigar); }
pair<Alignment, Alignment> Filter::depth_filter(Alignment& aln_first, Alignment& aln_second){ aln_first = depth_filter(aln_first); aln_second = depth_filter(aln_second); if (!(aln_first.name() == "") && !(aln_first.name() == "")){ return inverse ? make_pair(aln_first, aln_second) : make_pair(Alignment(), Alignment()); } else{ return inverse ? make_pair(Alignment(), Alignment()) : make_pair(aln_first, aln_second); } }
std::pair<Alignment, Alignment> Filter::path_length_filter(Alignment& aln_first, Alignment& aln_second){ Alignment x = path_length_filter(aln_first); Alignment y = path_length_filter(aln_second); if (x.name().empty() || y.name().empty()){ return inverse ? make_pair(x, y) : make_pair(Alignment(), Alignment()); } else{ return inverse ? make_pair(Alignment(), Alignment()) : make_pair(x, y); } }
int softclip_start(Alignment& alignment) { if (alignment.mutable_path()->mapping_size() > 0) { Path* path = alignment.mutable_path(); Mapping* first_mapping = path->mutable_mapping(0); Edit* first_edit = first_mapping->mutable_edit(0); if (first_edit->from_length() == 0 && first_edit->to_length() > 0) { return first_edit->to_length(); } } return 0; }
Widget* frame(const char* label, int toggle, Table* content) { content->set_border_width(2); content->set_col_spacings(5); content->set_spacings(2); Panel* panel = manage(new Panel(label, scales[toggle - 3]->get_widget(), content)); Alignment* alignment = manage(new Alignment(0.0, 0.0, 1.0, 0.0)); alignment->add(*panel); return alignment; }
int softclip_end(Alignment& alignment) { if (alignment.mutable_path()->mapping_size() > 0) { Path* path = alignment.mutable_path(); Mapping* last_mapping = path->mutable_mapping(path->mapping_size()-1); Edit* last_edit = last_mapping->mutable_edit(last_mapping->edit_size()-1); if (last_edit->from_length() == 0 && last_edit->to_length() > 0) { return last_edit->to_length(); } } return 0; }