/** * Parse a single quality string from fb and store qualities in r. * Assume the next character obtained via fb.get() is the first * character of the quality string. When returning, the next * character returned by fb.peek() or fb.get() should be the first * character of the following line. */ int parseQuals( Read& r, FileBuf& fb, int firstc, int readLen, int trim3, int trim5, bool intQuals, bool phred64, bool solexa64) { int c = firstc; assert(c != '\n' && c != '\r'); r.qual.clear(); if (intQuals) { while (c != '\r' && c != '\n' && c != -1) { bool neg = false; int num = 0; while(!isspace(c) && !fb.eof()) { if(c == '-') { neg = true; assert_eq(num, 0); } else { if(!isdigit(c)) { char buf[2048]; cerr << "Warning: could not parse quality line:" << endl; fb.getPastNewline(); cerr << fb.copyLastN(buf); buf[2047] = '\0'; cerr << buf; throw 1; } assert(isdigit(c)); num *= 10; num += (c - '0'); } c = fb.get(); } if(neg) num = 0; // Phred-33 ASCII encode it and add it to the back of the // quality string r.qual.append('!' + num); // Skip over next stretch of whitespace while(c != '\r' && c != '\n' && isspace(c) && !fb.eof()) { c = fb.get(); } } } else { while (c != '\r' && c != '\n' && c != -1) { r.qual.append(charToPhred33(c, solexa64, phred64)); c = fb.get(); while(c != '\r' && c != '\n' && isspace(c) && !fb.eof()) { c = fb.get(); } } } if ((int)r.qual.length() < readLen-1 || ((int)r.qual.length() < readLen && !r.color)) { tooFewQualities(r.name); } r.qual.trimEnd(trim3); if(r.qual.length()-trim5 < r.patFw.length()) { assert(gColor && r.primer != -1); assert_gt(trim5, 0); trim5--; } r.qual.trimBegin(trim5); if(r.qual.length() <= 0) return 0; assert_eq(r.qual.length(), r.patFw.length()); while(fb.peek() == '\n' || fb.peek() == '\r') fb.get(); return (int)r.qual.length(); }
static void driver( const string& infile, EList<string>& infiles, const string& snpfile, const string& htfile, const string& ssfile, const string& exonfile, const string& svfile, const string& outfile, bool packed, int reverse) { initializeCntLut(); initializeCntBit(); EList<FileBuf*> is(MISC_CAT); bool bisulfite = false; RefReadInParams refparams(false, reverse, nsToAs, bisulfite); assert_gt(infiles.size(), 0); if(format == CMDLINE) { // Adapt sequence strings to stringstreams open for input stringstream *ss = new stringstream(); for(size_t i = 0; i < infiles.size(); i++) { (*ss) << ">" << i << endl << infiles[i].c_str() << endl; } FileBuf *fb = new FileBuf(ss); assert(fb != NULL); assert(!fb->eof()); assert(fb->get() == '>'); ASSERT_ONLY(fb->reset()); assert(!fb->eof()); is.push_back(fb); } else { // Adapt sequence files to ifstreams for(size_t i = 0; i < infiles.size(); i++) { FILE *f = fopen(infiles[i].c_str(), "r"); if (f == NULL) { cerr << "Error: could not open "<< infiles[i].c_str() << endl; throw 1; } FileBuf *fb = new FileBuf(f); assert(fb != NULL); if(fb->peek() == -1 || fb->eof()) { cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl; continue; } assert(!fb->eof()); assert(fb->get() == '>'); ASSERT_ONLY(fb->reset()); assert(!fb->eof()); is.push_back(fb); } } if(is.empty()) { cerr << "Warning: All fasta inputs were empty" << endl; throw 1; } filesWritten.push_back(outfile + ".1." + gfm_ext); filesWritten.push_back(outfile + ".2." + gfm_ext); // Vector for the ordered list of "records" comprising the input // sequences. A record represents a stretch of unambiguous // characters in one of the input sequences. EList<RefRecord> szs(MISC_CAT); std::pair<size_t, size_t> sztot; { if(verbose) cerr << "Reading reference sizes" << endl; Timer _t(cerr, " Time reading reference sizes: ", verbose); if(!reverse && (writeRef || justRef)) { filesWritten.push_back(outfile + ".3." + gfm_ext); filesWritten.push_back(outfile + ".4." + gfm_ext); sztot = BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, szs, sanityCheck); } else { sztot = BitPairReference::szsFromFasta(is, string(), bigEndian, refparams, szs, sanityCheck); } } if(justRef) return; assert_gt(sztot.first, 0); assert_gt(sztot.second, 0); assert_gt(szs.size(), 0); // Construct index from input strings and parameters filesWritten.push_back(outfile + ".5." + gfm_ext); filesWritten.push_back(outfile + ".6." + gfm_ext); filesWritten.push_back(outfile + ".7." + gfm_ext); filesWritten.push_back(outfile + ".8." + gfm_ext); TStr s; HGFM<TIndexOffU> hGFM( s, packed, 1, // TODO: maybe not? lineRate, offRate, // suffix-array sampling rate ftabChars, // number of chars in initial arrow-pair calc localOffRate, localFtabChars, nthreads, snpfile, htfile, ssfile, exonfile, svfile, outfile, // basename for .?.ht2 files reverse == 0, // fw !entireSA, // useBlockwise bmax, // block size for blockwise SA builder bmaxMultSqrt, // block size as multiplier of sqrt(len) bmaxDivN, // block size as divisor of len noDc? 0 : dcv,// difference-cover period is, // list of input streams szs, // list of reference sizes (TIndexOffU)sztot.first, // total size of all unambiguous ref chars refparams, // reference read-in parameters seed, // pseudo-random number generator seed -1, // override offRate verbose, // be talkative autoMem, // pass exceptions up to the toplevel so that we can adjust memory settings automatically sanityCheck); // verify results and internal consistency // Note that the Ebwt is *not* resident in memory at this time. To // load it into memory, call ebwt.loadIntoMemory() if(verbose) { // Print Ebwt's vital stats hGFM.gh().print(cerr); } if(sanityCheck) { // Try restoring the original string (if there were // multiple texts, what we'll get back is the joined, // padded string, not a list) hGFM.loadIntoMemory( reverse ? (refparams.reverse == REF_READ_REVERSE) : 0, true, // load SA sample? true, // load ftab? true, // load rstarts? false, false); SString<char> s2; hGFM.restore(s2); hGFM.evictFromMemory(); { SString<char> joinedss = GFM<>::join<SString<char> >( is, // list of input streams szs, // list of reference sizes (TIndexOffU)sztot.first, // total size of all unambiguous ref chars refparams, // reference read-in parameters seed); // pseudo-random number generator seed if(refparams.reverse == REF_READ_REVERSE) { joinedss.reverse(); } assert_eq(joinedss.length(), s2.length()); assert(sstr_eq(joinedss, s2)); } if(verbose) { if(s2.length() < 1000) { cout << "Passed restore check: " << s2.toZBuf() << endl; } else { cout << "Passed restore check: (" << s2.length() << " chars)" << endl; } } } }