int main(int argc, const char** argv) { usage(argc, argv); string read = argv[1]; string germline = argv[2]; Fasta Vgenes(germline+"V.fa", 2, "|"); Fasta Jgenes(germline+"J.fa", 2, "|"); Fasta interestingV = extractInterestingGenes(Vgenes, argv[3]); Fasta interestingJ = extractInterestingGenes(Jgenes, argv[4]); if (interestingV.size() == 0) { cerr << "No interesting V found" << endl; exit(2); } if (interestingJ.size() == 0) { cerr << "No interesting J found" << endl; exit(2); } AlignBox box_V("5", V_COLOR); AlignBox box_J("3", J_COLOR); if (read == "-") { // Read on stdin read = read_sequence(cin); } align_against_collection(read, interestingV, -1, false, false, false, &box_V, VDJ); align_against_collection(read, interestingJ, -1, false, true, false, &box_J, VDJ); // This should be handled directly into align_against_collection box_J.start = box_J.end ; box_J.del_left = box_J.del_right; box_J.end = read.size() - 1; int align_V_length = min(GENE_ALIGN, box_V.end - box_V.start + 1); int align_J_length = min(GENE_ALIGN, (int)read.size() - box_J.start + 1); int start_V = box_V.end - align_V_length + 1; int end_J = box_J.start + align_J_length - 1; cout << "read \t" << start_V << "\t" ; cout << V_COLOR << read.substr(start_V, align_V_length) << NO_COLOR << read.substr(box_V.end+1, (box_J.start - 1) - (box_V.end + 1) +1) << J_COLOR << read.substr(box_J.start, align_J_length) << NO_COLOR << "\t" << end_J << endl ; cout << box_V.refToString(start_V, end_J) << "\t" << box_V << endl ; cout << box_J.refToString(start_V, end_J) << "\t" << box_J << endl ; exit (0); }
Fasta extractInterestingGenes(Fasta &repertoire, string name) { Fasta interesting; int size = repertoire.size(); for (int i = 0; i < size; i++) { if (repertoire.label(i).find(name) != string::npos) { interesting.add(repertoire.read(i)); } } return interesting; }
void align_against_collection(string &read, Fasta &rep, int forbidden_rep_id, bool reverse_ref, bool reverse_both, bool local, AlignBox *box, Cost segment_cost) { int best_score = MINUS_INF ; box->ref_nb = MINUS_INF ; int best_best_i = (int) string::npos ; int best_best_j = (int) string::npos ; int best_first_i = (int) string::npos ; int best_first_j = (int) string::npos ; vector<pair<int, int> > score_r; DynProg::DynProgMode dpMode = DynProg::LocalEndWithSomeDeletions; if (local==true) dpMode = DynProg::Local; // With reverse_ref, the read is reversed to prevent calling revcomp on each reference sequence string sequence_or_rc = revcomp(read, reverse_ref); for (int r = 0 ; r < rep.size() ; r++) { if (r == forbidden_rep_id) continue; DynProg dp = DynProg(sequence_or_rc, rep.sequence(r), dpMode, // DynProg::SemiGlobalTrans, segment_cost, // DNA reverse_both, reverse_both, rep.read(r).marked_pos); bool onlyBottomTriangle = !local ; int score = dp.compute(onlyBottomTriangle, BOTTOM_TRIANGLE_SHIFT); if (local==true){ dp.backtrack(); } if (score > best_score) { best_score = score ; best_best_i = dp.best_i ; best_best_j = dp.best_j ; best_first_i = dp.first_i ; best_first_j = dp.first_j ; box->ref_nb = r ; box->ref_label = rep.label(r) ; if (!local) dp.backtrack(); box->marked_pos = dp.marked_pos_i ; } score_r.push_back(make_pair(score, r)); // #define DEBUG_SEGMENT #ifdef DEBUG_SEGMENT cout << rep.label(r) << " " << score << " " << dp.best_i << endl ; #endif } sort(score_r.begin(),score_r.end(),comp_pair); box->ref = rep.sequence(box->ref_nb); box->del_right = reverse_both ? best_best_j : box->ref.size() - best_best_j - 1; box->del_left = best_first_j; box->start = best_first_i; box->score = score_r; #ifdef DEBUG_SEGMENT cout << "best: " << box->ref_label << " " << best_score ; cout << "del/del2/begin:" << (box->del_right) << "/" << (box->del_left) << "/" << (box->start) << endl; cout << endl; #endif if (reverse_ref) // Why -1 here and +1 in dynprog.cpp /// best_i = m - best_i + 1 ; best_best_i = read.length() - best_best_i - 1 ; box->end = best_best_i ; }