コード例 #1
0
/**
 * Sanity-check various pieces of the Ebwt
 */
void Ebwt::sanityCheckAll(int reverse) const {
	const EbwtParams& eh = this->_eh;
	assert(isInMemory());
	// Check ftab
	for(uint32_t i = 1; i < eh._ftabLen; i++) {
		assert_geq(this->ftabHi(i), this->ftabLo(i-1));
		assert_geq(this->ftabLo(i), this->ftabHi(i-1));
		assert_leq(this->ftabHi(i), eh._bwtLen+1);
	}
	assert_eq(this->ftabHi(eh._ftabLen-1), eh._bwtLen);
	
	// Check offs
	int seenLen = (eh._bwtLen + 31) >> 5;
	uint32_t *seen;
	try {
		seen = new uint32_t[seenLen]; // bitvector marking seen offsets
	} catch(bad_alloc& e) {
		cerr << "Out of memory allocating seen[] at " << __FILE__ << ":" << __LINE__ << endl;
		throw e;
	}
	memset(seen, 0, 4 * seenLen);
	uint32_t offsLen = eh._offsLen;
	for(uint32_t i = 0; i < offsLen; i++) {
		assert_lt(this->offs()[i], eh._bwtLen);
		int w = this->offs()[i] >> 5;
		int r = this->offs()[i] & 31;
		assert_eq(0, (seen[w] >> r) & 1); // shouldn't have been seen before
		seen[w] |= (1 << r);
	}
	delete[] seen;
	
	// Check nPat
	assert_gt(this->_nPat, 0);
	
	// Check plen, flen
	for(uint32_t i = 0; i < this->_nPat; i++) {
		assert_geq(this->plen()[i], 0);
	}
	
	// Check rstarts
	if(this->rstarts() != NULL) {
		for(uint32_t i = 0; i < this->_nFrag-1; i++) {
			assert_gt(this->rstarts()[(i+1)*3], this->rstarts()[i*3]);
			if(reverse == REF_READ_REVERSE) {
				assert(this->rstarts()[(i*3)+1] >= this->rstarts()[((i+1)*3)+1]);
			} else {
				assert(this->rstarts()[(i*3)+1] <= this->rstarts()[((i+1)*3)+1]);
			}
		}
	}
	
	// Check ebwt
	sanityCheckUpToSide(eh._numSides);
	VMSG_NL("Ebwt::sanityCheck passed");
}
コード例 #2
0
ファイル: Positivity.cpp プロジェクト: smoe1/ImplicitExplicit
// This way of handling positivity violations is
// not continuous in time.  One would have to
// transition gradually from one level to another.
// stringencyLevel should be changed to double
// (integer values could serve as points where
// complete transition is made).
//
void PositivityState::increaseStringency()
{
    if(stringencyLevel==AVERAGE)
    {
        dprintf("increasing stringency level to POSITIVITY_POINTS");
        stringencyLevel = POSITIVITY_POINTS;
        repeatStageFlag = true;
    }
    //else if(stringencyLevel==POSITIVITY_POINTS)
    //{
    //  dprintf("increasing stringency level to IMPLICIT_SOURCE");
    //  stringencyLevel = IMPLICIT_SOURCE;
    //  repeatStepFlag = true;
    //}
    //else if(stringencyLevel==IMPLICIT_SOURCE)
    else if(stringencyLevel==POSITIVITY_POINTS)
    {
        // make this number configurable?
        // could rescale based on comparison
        // of minval with minval computed with
        // larger time step.
        //
        // want to modify dt by a factor that will
        // cause minval to be zero.
        //
        // time step must be repeated if step length is changed.
        repeatStepFlag = true;
#if 0
        if(suggested_dt_changeFactor < .8)
            dt_changeFactor = .8;
        else if(suggested_dt_changeFactor > .95)
            dt_changeFactor = .95;
        else
            dt_changeFactor = suggested_dt_changeFactor;
#endif
        //double dt_changeFactor = .95;
        //cflFactor *= dt_changeFactor;
        cflFactor -= .0625;
        //dt *= dt_changeFactor;
        //dprint(dt);
        dprintf1("decreased cflFactor to %f",cflFactor);
        // if cflFactor gets too small something must have gone wrong.
        // There should exist a positivity-guaranteeing CFL number
        // that is independent of the solution (assuming that there
        // is a positivity-guaranteeing CFL number that is independent
        // of the solution for the HLLE method, which maybe isn't quite
        // true because we don't have a perfect way to obtain an upper
        // bound on physical wave speeds for the Riemann problem between
        // two cell states (Einfeldt's prescription is a not a guarantee
        // for all strictly hyperbolic systems).
        //assert_gt(cflFactor, .08);
        assert_gt(cflFactor, 0.);
    }
    else
    {
        invalid_value_error(stringencyLevel);
    }
}
コード例 #3
0
ファイル: FileUtils.hpp プロジェクト: AlexanderKazakov/gcm
 static void writeArrayToBinaryFileStream(std::ofstream& fileStream,
         const T* array, const size_t sizeOfArray) {
     assert_gt(sizeOfArray, 0);
     auto bufferSize = (std::streamsize)(sizeOfArray * sizeof(T));
     auto previousNumberOfBytes = fileStream.tellp();
     assert_true(fileStream.write(reinterpret_cast<const char*>(array), bufferSize));
     auto currentNumberOfBytes = fileStream.tellp();
     assert_eq(bufferSize, currentNumberOfBytes - previousNumberOfBytes);
 }
コード例 #4
0
ファイル: ref_read.cpp プロジェクト: ccls/sequencing
/**
 * Calculate a vector containing the sizes of all of the patterns in
 * all of the given input files, in order.  Returns the total size of
 * all references combined.  Rewinds each istream before returning.
 */
std::pair<size_t, size_t>
fastaRefReadSizes(
	EList<FileBuf*>& in,
	EList<RefRecord>& recs,
	const RefReadInParams& rparms,
	BitpairOutFileBuf* bpout,
	int& numSeqs)
{
	uint32_t unambigTot = 0;
	uint32_t bothTot = 0;
	RefReadInParams rpcp = rparms;
	assert_gt(in.size(), 0);
	// For each input istream
	for(size_t i = 0; i < in.size(); i++) {
		bool first = true;
		assert(!in[i]->eof());
		// For each pattern in this istream
		while(!in[i]->eof()) {
			RefRecord rec = fastaRefReadSize(*in[i], rparms, first, bpout);
			if((unambigTot + rec.len) < unambigTot) {
				cerr << "Error: Reference sequence has more than 2^32-1 characters!  Please divide the" << endl
				     << "reference into batches or chunks of about 3.6 billion characters or less each" << endl
				     << "and index each independently." << endl;
				throw 1;
			}
			// Add the length of this record.
			if(rec.first) numSeqs++;
			unambigTot += rec.len;
			bothTot += rec.len;
			bothTot += rec.off;
			first = false;
			if(rec.len == 0 && rec.off == 0 && !rec.first) continue;
			recs.push_back(rec);
		}
		// Reset the input stream
		in[i]->reset();
		assert(!in[i]->eof());
#ifndef NDEBUG
		// Check that it's really reset
		int c = in[i]->get();
		assert_eq('>', c);
		in[i]->reset();
		assert(!in[i]->eof());
#endif
	}
	assert_geq(bothTot, 0);
	assert_geq(unambigTot, 0);
	return make_pair(
		unambigTot, // total number of unambiguous DNA characters read
		bothTot); // total number of DNA characters read, incl. ambiguous ones
}
コード例 #5
0
ファイル: Polynomial.cpp プロジェクト: smoe1/ImplicitExplicit
// ratio = v0/v1.
inline double secant_rule(double x0,double x1,double ratio,
  int num_roots_in_cluster)
{
    switch(num_roots_in_cluster)
    {
     case 1:
      break;
     case 2:
      eprintf("disallowed");
      break;
     case 3:
      ratio = cbrt(ratio);
      break;
     default:
      assert_eq(1,num_roots_in_cluster%2);
      ratio = copysign(pow(fabs(ratio),1./num_roots_in_cluster),ratio);
    }
    double denominator = (1.-ratio);
    assert_gt(denominator,0.);
    return (x0-ratio*x1)/denominator;
}
コード例 #6
0
/**
 * Start the driver.  The driver will begin by conducting a best-first,
 * index-assisted search through the space of possible full and partial
 * alignments.  This search may be followed up with a dynamic programming
 * extension step, taking a prioritized set of partial SA ranges found
 * during the search and extending each with DP.  The process might also be
 * iterated, with the search being occasioanally halted so that DPs can be
 * tried, then restarted, etc.
 */
int AlignerDriver::go(
	const Scoring& sc,
	const Ebwt& ebwtFw,
	const Ebwt& ebwtBw,
	const BitPairReference& ref,
	DescentMetrics& met,
	WalkMetrics& wlm,
	PerReadMetrics& prm,
	RandomSource& rnd,
	AlnSinkWrap& sink)
{
	if(paired_) {
		// Paired-end - alternate between advancing dr1_ / dr2_ whenever a
		// new full alignment is discovered in the one currently being
		// advanced.  Whenever a new full alignment is found, check to see
		// if it pairs with a previously discovered alignment.
		bool first1 = rnd.nextBool();
		bool first = true;
		DescentStoppingConditions stopc1 = stop_;
		DescentStoppingConditions stopc2 = stop_;
		size_t totszIncr = (stop_.totsz + 7) / 8;
		stopc1.totsz = totszIncr;
		stopc2.totsz = totszIncr;
		while(stopc1.totsz <= stop_.totsz && stopc2.totsz <= stop_.totsz) {
			if(first && first1 && stopc1.totsz <= stop_.totsz) {
				dr1_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm);
				stopc1.totsz += totszIncr;
			}
			if(stopc2.totsz <= stop_.totsz) {
				dr2_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm);
				stopc2.totsz += totszIncr;
			}
			first = false;
		}
	} else {
		// Unpaired
		size_t iter = 1;
		while(true) {
			int ret = dr1_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm);
			if(ret == DESCENT_DRIVER_ALN) {
				//cerr << iter << ". DESCENT_DRIVER_ALN" << endl;
			} else if(ret == DESCENT_DRIVER_MEM) {
				//cerr << iter << ". DESCENT_DRIVER_MEM" << endl;
				break;
			} else if(ret == DESCENT_DRIVER_STRATA) {
				// DESCENT_DRIVER_STRATA is returned by DescentDriver.advance()
				// when it has finished with a "non-empty" stratum: a stratum
				// in which at least one alignment was found.  Here we report
				// the alignments in an arbitrary order.
				AlnRes res;
				// Initialize alignment selector with the DescentDriver's
				// alignment sink
				alsel_.init(
					dr1_.query(),
					dr1_.sink(),
					ebwtFw,
					ref,
					rnd,
					wlm);
				while(!alsel_.done() && !sink.state().doneWithMate(true)) {
					res.reset();
					bool ret2 = alsel_.next(
						dr1_,
						ebwtFw,
						ref,
						rnd,
						res,
						wlm,
						prm);
					if(ret2) {
						// Got an alignment
						assert(res.matchesRef(
							dr1_.query(),
							ref,
							tmp_rf_,
							tmp_rdseq_,
							tmp_qseq_,
							raw_refbuf_,
							raw_destU32_,
							raw_matches_));
						// Get reference interval involved in alignment
						Interval refival(res.refid(), 0, res.fw(), res.reflen());
						assert_gt(res.refExtent(), 0);
						// Does alignment falls off end of reference?
						if(gReportOverhangs &&
						   !refival.containsIgnoreOrient(res.refival()))
						{
							res.clipOutside(true, 0, res.reflen());
							if(res.refExtent() == 0) {
								continue;
							}
						}
						assert(gReportOverhangs ||
							   refival.containsIgnoreOrient(res.refival()));
						// Alignment fell entirely outside the reference?
						if(!refival.overlapsIgnoreOrient(res.refival())) {
							continue; // yes, fell outside
						}
						// Alignment redundant with one we've seen previously?
						if(red1_.overlap(res)) {
							continue; // yes, redundant
						}
						red1_.add(res); // so we find subsequent redundancies
						// Report an unpaired alignment
						assert(!sink.state().doneWithMate(true));
						assert(!sink.maxed());
						if(sink.report(0, &res, NULL)) {
							// Short-circuited because a limit, e.g. -k, -m or
							// -M, was exceeded
							return ALDRIVER_POLICY_FULFILLED;
						}
					}
				}
				dr1_.sink().advanceStratum();
			} else if(ret == DESCENT_DRIVER_BWOPS) {
				//cerr << iter << ". DESCENT_DRIVER_BWOPS" << endl;
			} else if(ret == DESCENT_DRIVER_DONE) {
				//cerr << iter << ". DESCENT_DRIVER_DONE" << endl;
				break;
			} else {
				assert(false);
			}
			iter++;
		}
	}
	return ALDRIVER_EXHAUSTED_CANDIDATES;
}
コード例 #7
0
ファイル: hisat2_build.cpp プロジェクト: infphilo/hisat2
static void driver(
                   const string& infile,
                   EList<string>& infiles,
                   const string& snpfile,
                   const string& htfile,
                   const string& ssfile,
                   const string& exonfile,
                   const string& svfile,
                   const string& outfile,
                   bool packed,
                   int reverse)
{
    initializeCntLut();
    initializeCntBit();
	EList<FileBuf*> is(MISC_CAT);
	bool bisulfite = false;
	RefReadInParams refparams(false, reverse, nsToAs, bisulfite);
	assert_gt(infiles.size(), 0);
	if(format == CMDLINE) {
		// Adapt sequence strings to stringstreams open for input
		stringstream *ss = new stringstream();
		for(size_t i = 0; i < infiles.size(); i++) {
			(*ss) << ">" << i << endl << infiles[i].c_str() << endl;
		}
		FileBuf *fb = new FileBuf(ss);
		assert(fb != NULL);
		assert(!fb->eof());
		assert(fb->get() == '>');
		ASSERT_ONLY(fb->reset());
		assert(!fb->eof());
		is.push_back(fb);
	} else {
		// Adapt sequence files to ifstreams
		for(size_t i = 0; i < infiles.size(); i++) {
			FILE *f = fopen(infiles[i].c_str(), "r");
			if (f == NULL) {
				cerr << "Error: could not open "<< infiles[i].c_str() << endl;
				throw 1;
			}
			FileBuf *fb = new FileBuf(f);
			assert(fb != NULL);
			if(fb->peek() == -1 || fb->eof()) {
				cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl;
				continue;
			}
			assert(!fb->eof());
			assert(fb->get() == '>');
			ASSERT_ONLY(fb->reset());
			assert(!fb->eof());
			is.push_back(fb);
		}
	}
	if(is.empty()) {
		cerr << "Warning: All fasta inputs were empty" << endl;
		throw 1;
	}
    filesWritten.push_back(outfile + ".1." + gfm_ext);
    filesWritten.push_back(outfile + ".2." + gfm_ext);
	// Vector for the ordered list of "records" comprising the input
	// sequences.  A record represents a stretch of unambiguous
	// characters in one of the input sequences.
	EList<RefRecord> szs(MISC_CAT);
	std::pair<size_t, size_t> sztot;
	{
		if(verbose) cerr << "Reading reference sizes" << endl;
		Timer _t(cerr, "  Time reading reference sizes: ", verbose);
		if(!reverse && (writeRef || justRef)) {
			filesWritten.push_back(outfile + ".3." + gfm_ext);
			filesWritten.push_back(outfile + ".4." + gfm_ext);
			sztot = BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, szs, sanityCheck);
		} else {
			sztot = BitPairReference::szsFromFasta(is, string(), bigEndian, refparams, szs, sanityCheck);
		}
	}
	if(justRef) return;
	assert_gt(sztot.first, 0);
	assert_gt(sztot.second, 0);
	assert_gt(szs.size(), 0);
    
	// Construct index from input strings and parameters	
    filesWritten.push_back(outfile + ".5." + gfm_ext);
    filesWritten.push_back(outfile + ".6." + gfm_ext);
    filesWritten.push_back(outfile + ".7." + gfm_ext);
    filesWritten.push_back(outfile + ".8." + gfm_ext);
	TStr s;
	HGFM<TIndexOffU> hGFM(
                          s,
                          packed,
                          1,  // TODO: maybe not?
                          lineRate,
                          offRate,      // suffix-array sampling rate
                          ftabChars,    // number of chars in initial arrow-pair calc
                          localOffRate,
                          localFtabChars,
                          nthreads,
                          snpfile,
                          htfile,
                          ssfile,
                          exonfile,
                          svfile,
                          outfile,      // basename for .?.ht2 files
                          reverse == 0, // fw
                          !entireSA,    // useBlockwise
                          bmax,         // block size for blockwise SA builder
                          bmaxMultSqrt, // block size as multiplier of sqrt(len)
                          bmaxDivN,     // block size as divisor of len
                          noDc? 0 : dcv,// difference-cover period
                          is,           // list of input streams
                          szs,          // list of reference sizes
                          (TIndexOffU)sztot.first,  // total size of all unambiguous ref chars
                          refparams,    // reference read-in parameters
                          seed,         // pseudo-random number generator seed
                          -1,           // override offRate
                          verbose,      // be talkative
                          autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
                          sanityCheck); // verify results and internal consistency
    // Note that the Ebwt is *not* resident in memory at this time.  To
    // load it into memory, call ebwt.loadIntoMemory()
	if(verbose) {
		// Print Ebwt's vital stats
		hGFM.gh().print(cerr);
	}
	if(sanityCheck) {
		// Try restoring the original string (if there were
		// multiple texts, what we'll get back is the joined,
		// padded string, not a list)
		hGFM.loadIntoMemory(
                            reverse ? (refparams.reverse == REF_READ_REVERSE) : 0,
                            true,  // load SA sample?
                            true,  // load ftab?
                            true,  // load rstarts?
                            false,
                            false);
		SString<char> s2;
		hGFM.restore(s2);
		hGFM.evictFromMemory();
		{
			SString<char> joinedss = GFM<>::join<SString<char> >(
				is,          // list of input streams
				szs,         // list of reference sizes
				(TIndexOffU)sztot.first, // total size of all unambiguous ref chars
				refparams,   // reference read-in parameters
				seed);       // pseudo-random number generator seed
			if(refparams.reverse == REF_READ_REVERSE) {
				joinedss.reverse();
			}
			assert_eq(joinedss.length(), s2.length());
			assert(sstr_eq(joinedss, s2));
		}
		if(verbose) {
			if(s2.length() < 1000) {
				cout << "Passed restore check: " << s2.toZBuf() << endl;
			} else {
				cout << "Passed restore check: (" << s2.length() << " chars)" << endl;
			}
		}
	}
}
コード例 #8
0
ファイル: ebwt_build.cpp プロジェクト: BenLangmead/bowtie
static void driver(const string& infile,
                   vector<string>& infiles,
                   const string& outfile,
                   bool reverse = false)
{
	vector<FileBuf*> is;
	bool bisulfite = false;
	RefReadInParams refparams(color, reverse ? reverseType : REF_READ_FORWARD, nsToAs, bisulfite);
	assert_gt(infiles.size(), 0);
	if(format == CMDLINE) {
		// Adapt sequence strings to stringstreams open for input
		stringstream *ss = new stringstream();
		for(size_t i = 0; i < infiles.size(); i++) {
			(*ss) << ">" << i << endl << infiles[i] << endl;
		}
		FileBuf *fb = new FileBuf(ss);
		assert(fb != NULL);
		assert(!fb->eof());
		assert(fb->get() == '>');
		ASSERT_ONLY(fb->reset());
		assert(!fb->eof());
		is.push_back(fb);
	} else {
		// Adapt sequence files to ifstreams
		for(size_t i = 0; i < infiles.size(); i++) {
			FILE *f = fopen(infiles[i].c_str(), "rb");
			if (f == NULL) {
				cerr << "Error: could not open "<< infiles[i] << endl;
				throw 1;
			}
			FileBuf *fb = new FileBuf(f);
			assert(fb != NULL);
			assert(!fb->eof());
			assert(fb->get() == '>');
			ASSERT_ONLY(fb->reset());
			assert(!fb->eof());
			is.push_back(fb);
		}
	}
	// Vector for the ordered list of "records" comprising the input
	// sequences.  A record represents a stretch of unambiguous
	// characters in one of the input sequences.
	vector<RefRecord> szs;
	vector<uint32_t> plens;
	std::pair<size_t, size_t> sztot;
	{
		if(verbose) cout << "Reading reference sizes" << endl;
		Timer _t(cout, "  Time reading reference sizes: ", verbose);
		if(!reverse && (writeRef || justRef)) {
			// For forward reference, dump it to .3.ebwt and .4.ebwt
			// files
			string file3 = outfile + ".3." + gEbwt_ext;
			string file4 = outfile + ".4." + gEbwt_ext;
			// Open output stream for the '.3.ebwt' file which will
			// hold the size records.
			ofstream fout3(file3.c_str(), ios::binary);
			if(!fout3.good()) {
				cerr << "Could not open index file for writing: \"" << file3 << "\"" << endl
					 << "Please make sure the directory exists and that permissions allow writing by" << endl
					 << "Bowtie." << endl;
				throw 1;
			}
			BitpairOutFileBuf bpout(file4.c_str());
			// Read in the sizes of all the unambiguous stretches of
			// the genome into a vector of RefRecords.  The input
			// streams are reset once it's done.
			writeU<int32_t>(fout3, 1, bigEndian); // endianness sentinel
			if(color) {
				refparams.color = false;
				// Make sure the .3.ebwt and .4.ebwt files contain
				// nucleotides; not colors
				TIndexOff numSeqs = 0;
				fastaRefReadSizes(is, szs, plens, refparams, &bpout, numSeqs);
				refparams.color = true;
				writeU<TIndexOffU>(fout3, (TIndexOffU)szs.size(), bigEndian); // write # records
				for(size_t i = 0; i < szs.size(); i++) {
					szs[i].write(fout3, bigEndian);
				}
				szs.clear();
				plens.clear();
				// Now read in the colorspace size records; these are
				// the ones that were indexed
				TIndexOff numSeqs2 = 0;
				sztot = fastaRefReadSizes(is, szs, plens, refparams, NULL, numSeqs2);
				assert_geq(numSeqs, numSeqs2);
			} else {
				TIndexOff numSeqs = 0;
				sztot = fastaRefReadSizes(is, szs, plens, refparams, &bpout, numSeqs);
				writeU<TIndexOffU>(fout3, (TIndexOffU)szs.size(), bigEndian); // write # records
				for(size_t i = 0; i < szs.size(); i++) szs[i].write(fout3, bigEndian);
			}
			if(sztot.first == 0) {
				cerr << "Error: No unambiguous stretches of characters in the input.  Aborting..." << endl;
				throw 1;
			}
			assert_gt(sztot.first, 0);
			assert_gt(sztot.second, 0);
			bpout.close();
			fout3.close();
#ifndef NDEBUG
			if(sanityCheck) {
				BitPairReference bpr(
					outfile, // ebwt basename
					color,   // expect color?
					true,    // sanity check?
					&infiles,// files to check against
					NULL,    // sequences to check against
					format == CMDLINE, // whether infiles contains strings
					true,    // load sequence?
					false,   // use memory-mapped files
					false,   // use shared memory
					false,   // sweep through memory-mapped memory
					false,   // be talkative
					false);  // be talkative
			}
#endif
		} else {
			// Read in the sizes of all the unambiguous stretches of the
			// genome into a vector of RefRecords
			TIndexOff numSeqs = 0;
			sztot = fastaRefReadSizes(is, szs, plens, refparams, NULL, numSeqs);
#ifndef NDEBUG
			if(refparams.color) {
				refparams.color = false;
				vector<RefRecord> szs2;
				vector<uint32_t> plens2;
				TIndexOff numSeqs2 = 0;
				fastaRefReadSizes(is, szs2, plens2, refparams, NULL, numSeqs2);
				assert_leq(numSeqs, numSeqs2);
				// One less color than base
				refparams.color = true;
			}
#endif
		}
	}
	if(justRef) return;
	assert_gt(sztot.first, 0);
	assert_gt(sztot.second, 0);
	assert_gt(szs.size(), 0);
	// Construct Ebwt from input strings and parameters
	Ebwt<TStr> ebwt(refparams.color ? 1 : 0,
	                lineRate,
	                linesPerSide,
	                offRate,      // suffix-array sampling rate
	                -1,           // ISA sampling rate
	                ftabChars,    // number of chars in initial arrow-pair calc
			nthreads,
	                outfile,      // basename for .?.ebwt files
	                !reverse,     // fw
	                !entireSA,    // useBlockwise
	                bmax,         // block size for blockwise SA builder
	                bmaxMultSqrt, // block size as multiplier of sqrt(len)
	                bmaxDivN,     // block size as divisor of len
	                noDc? 0 : dcv,// difference-cover period
	                is,           // list of input streams
	                szs,          // list of reference sizes
	                plens,        // list of not-all-gap reference sequence lengths
	                (TIndexOffU)sztot.first,  // total size of all unambiguous ref chars
	                refparams,    // reference read-in parameters
	                seed,         // pseudo-random number generator seed
	                -1,           // override offRate
	                -1,           // override isaRate
	                verbose,      // be talkative
	                autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
	                sanityCheck); // verify results and internal consistency
	// Note that the Ebwt is *not* resident in memory at this time.  To
	// load it into memory, call ebwt.loadIntoMemory()
	if(verbose) {
		// Print Ebwt's vital stats
		ebwt.eh().print(cout);
	}
	if(sanityCheck) {
		// Try restoring the original string (if there were
		// multiple texts, what we'll get back is the joined,
		// padded string, not a list)
		ebwt.loadIntoMemory(
			refparams.color ? 1 : 0,
			-1,
			false,
			false);
		TStr s2; ebwt.restore(s2);
		ebwt.evictFromMemory();
		{
			TStr joinedss = Ebwt<TStr>::join(
				is,          // list of input streams
				szs,         // list of reference sizes
				(TIndexOffU)sztot.first, // total size of all unambiguous ref chars
				refparams,   // reference read-in parameters
				seed);       // pseudo-random number generator seed
			if(refparams.reverse == REF_READ_REVERSE) {
				reverseInPlace(joinedss);
			}
			assert_eq(length(joinedss), length(s2));
			assert_eq(joinedss, s2);
		}
		if(verbose) {
			if(length(s2) < 1000) {
				cout << "Passed restore check: " << s2 << endl;
			} else {
				cout << "Passed restore check: (" << length(s2) << " chars)" << endl;
			}
		}
	}
}
コード例 #9
0
 inline bool nonzeroHeightEqualsOne() const {
   assert_gt(height(), 0);
   return !(data >> (1 + shiftAmount()));
 }