/** * Clip off some of the high-numbered positions. */ void Edit::clipHi(EList<Edit>& ed, size_t len, size_t amt) { assert_leq(amt, len); size_t max = len - amt; size_t nrm = 0; for(size_t i = 0; i < ed.size(); i++) { size_t ii = ed.size() - i - 1; assert_lt(ed[ii].pos, len); if(ed[ii].pos > max) { nrm++; } else if(ed[ii].pos == max && !ed[ii].isReadGap()) { nrm++; } else { break; } } ed.resize(ed.size() - nrm); }
/** * Merman main driver function. Does the following: * * 1. Parses command-line options */ int merman(int argc, char **argv) { reset(); try { parseCommandLine(argc, argv); Timer tov(cerr, "Overall time: ", timing); EList<string> refstrs; ReferenceSet refs; EList<string> refnames; EList<size_t> reflens; string refstr = argv[optind++]; tokenize(refstr, ",", refstrs); auto_ptr<MerIndex> ind( new MerIndex(ap, rp, readLen, seedWidth, nk.first, nk.second, specificity, begin, naiveCheck, nthreads)); { Timer t(cerr, "... ", timing); if(timing) cerr << "Reading reference sequences..." << endl; for(size_t i = 0; i < refstrs.size(); i++) { if(timing) { cerr << " Sequence " << (i+1) << " of " << refstrs.size() << endl; } if(refIsStr) { refs.addOrigReferenceString(refstrs[i].c_str(), rp); } else { refs.addOrigReferenceFasta(refstrs[i].c_str(), rp); } } for(size_t i = 0; i < refs.numRefs(); i++) { refnames.push_back(string(refs[i].name.toZBuf())); reflens.push_back(refs[i].seq.length(color)); } if(refs.numRefs() == 0) { cerr << "Warning: No references were found" << endl; } if(rp.genCrick) { if(timing) { cerr << " Crickizing" << endl; } // Add the crick strand. If there were bisulfite // transformations to the Watson strand, they are // removed from the Watson strand before the Crick copy // is made. Transformations are then applied to the // new Crick strand. This has the effect of correctly // producing either Watson / Crick in the non-bisulfite // case, or BS Watson / BS Crick in the bisulfite case. refs.addReferenceRevComps(rp, false, 1, 0); } if(rp.genRevcomps) { if(timing) { cerr << " Adding reverse comps" << endl; } // Add reverse complements of all existing references // (after the transformations have already been // applied). refs.addReferenceRevComps(rp, true, -1, 1); } assert(refs.repOk()); } pair<size_t, size_t> mers = make_pair(0, 0); EList<MerIndexThread> threads; { Timer t(cerr, "... ", timing); if(timing) cerr << "Preparing to extract sub-sequences..." << endl; // Instantiate and run index threads assert_gt(nthreads, 0); threads.resize(nthreads); for(int i = 0; i < nthreads; i++) { threads[i].runCount(&refs, ind.get(), i, nthreads, color); } for(int i = 0; i < nthreads; i++) { pair<size_t, size_t> mrs = threads[i].join(); mers.first += mrs.first; mers.second += mrs.second; } ind->allocateMers(); } if(timing || verbose || justBlowup) { cerr << "Expecting index footprint of "; printBytes(mers.first * sizeof(mer_ent), cerr); cerr << endl; if(mers.first > mers.second) { cerr.setf(ios::fixed); cerr << " base footprint is "; printBytes(mers.second * sizeof(mer_ent), cerr); cerr << endl << " blowup factor: " << setprecision(2) << ((double)mers.first / (double)mers.second) << endl; } if(justBlowup) throw 0; } { Timer t(cerr, "... ", timing); if(timing) cerr << "Extracting index sub-sequences..." << endl; // Instantiate and run index threads for(int i = 0; i < nthreads; i++) { threads[i].runIndex(&refs, ind.get(), i, nthreads, color); } for(int i = 0; i < nthreads; i++) threads[i].join(); } assert_eq(mers.first, ind->size()); if(verbose) { cout << " read " << refs.numRefs() << " reference strings" << endl; } if(refs.empty() && iformat != INPUT_CHAININ) { cerr << "Index is empty; not enough reference sequence supplied" << endl; throw 1; } if(refs.numRefs() == 0 && iformat != INPUT_CHAININ) { cerr << "No reference strings provided; aborting..." << endl; throw 1; } { Timer t(cerr, "Sorting reference mers: ", timing); ind->sort(nthreads); // sort mers } { Timer t(cerr, "... ", timing); if(timing) cerr << "Aligning reads..." << endl; string rstr = argv[optind++]; // Instantiate reference map, which translates to new reference // coordinate system prior to alignment output auto_ptr<ReferenceMap> rmap( refmapFile == NULL ? NULL : new ReferenceMap(refmapFile, !refidx)); // Instantiate annotation map, which encodes SNP locations & alleles auto_ptr<AnnotationMap> amap( annotFile == NULL ? NULL : new AnnotationMap(annotFile)); // Instantiate the read-input object auto_ptr<Reads> rs( (iformat == INPUT_CMDLINE) ? (Reads*)new StringReads(rstr, begin) : ((iformat == INPUT_FASTA) ? (Reads*)new FastaReads(rstr, begin, bufsz) : ((iformat == INPUT_FASTA_CONT) ? (Reads*)new FastaContinuousReads( rstr, begin, fastaContLen, fastaContFreq, fcontBis, fcontRc, color) : ((iformat == INPUT_FASTQ) ? (Reads*)new FastqReads(rstr, solexaScale, sixty4off, begin, bufsz) : ((iformat == INPUT_CHAININ) ? (Reads*)new ChainReads(rstr, begin, bufsz) : ((iformat == INPUT_CSFASTA) ? (Reads*)new CSFastaReads(rstr, begin, bufsz) : ((iformat == INPUT_CSFASTA_AND_QV) ? (Reads*)new CSFastaAndQVReads(rstr, qualFile, begin, bufsz) : (Reads*)new CSFastqReads(rstr, solexaScale, sixty4off, begin, bufsz)))))))); // Set output stream string of = "-"; if(optind < argc) of = argv[optind++]; // Instantiate the alignment-output object auto_ptr<AlignOutput> outs( (oformat == OUTPUT_SAM) ? (AlignOutput*)new SamOutput(of, fullref, refidx, rp.bisulfiteC || rp.bisulfiteCpG, !samNoCsCq) : (AlignOutput*)new BowtieOutput(of, fullref, printCost, refidx, rp.bisulfiteC || rp.bisulfiteCpG)); outs->printHeader(refnames, reflens); // Run the progress thread, if requested ProgressThread proThread; if(progress) proThread.run(); // Instantiate and run search threads EList<SearchThread> sthreads; sthreads.resize(nthreads); for(int i = 0; i < (int)sthreads.size(); i++) { sthreads[i].init( i, (int)sthreads.size(), ind.get(), rs.get(), &refs, outs.get(), rmap.get(), amap.get()); sthreads[i].run(); } // Wait until search sthreads are finished for(size_t i = 0; i < sthreads.size(); i++) { sthreads[i].join(); } if(progress) { proThread.kill(); proThread.join(); } outs->flush(); } if(!quiet) ProgressThread::reportStats(); } catch(exception& e) { cerr << "Command: "; for(int i = 0; i < argc; i++) cerr << argv[i] << " "; cerr << endl; return 1; } catch(int e) { if(e != 0) { cerr << "Command: "; for(int i = 0; i < argc; i++) cerr << argv[i] << " "; cerr << endl; } return e; } return 0; }