예제 #1
0
/**
 * Bowtie main function.  It is placed in a separate source file to
 * make it slightly easier to compile Bowtie as a library.
 *
 * If the user specifies -A <file> as the first two arguments, main
 * will interpret that file as having one set of command-line arguments
 * per line, and will dispatch each batch of arguments one at a time to
 * bowtie.
 */
int main(int argc, const char **argv) {
	if(argc > 2 && strcmp(argv[1], "-A") == 0) {
		const char *file = argv[2];
		ifstream in;
		in.open(file);
		char buf[4096];
		int lastret = -1;
		while(in.getline(buf, 4095)) {
			EList<string> args;
			args.push_back(string(argv[0]));
			tokenize(buf, " \t", args);
			const char **myargs = (const char**)malloc(sizeof(char*)*args.size());
			for(size_t i = 0; i < args.size(); i++) {
				myargs[i] = args[i].c_str();
			}
			if(args.size() == 1) continue;
			lastret = bowtie((int)args.size(), myargs);
			free(myargs);
		}
		if(lastret == -1) {
			cerr << "Warning: No arg strings parsed from " << file << endl;
			return 0;
		}
		return lastret;
	} else {
		return bowtie(argc, argv);
	}
}
예제 #2
0
EList ListGraph::getAdj(NodeID u) const{
	 EList lst;
	 node* temp = ary[u].next;
	 while(temp != NULL){
		 lst.push_back(temp->p);
		 temp = temp->next;
	 };
	 return lst;
 };
예제 #3
0
   // @note The general idea of this method comes from https://github.com/ScottDVincent/HW05_vincensd_v2/
	std::list<NWPair> MatrixGraph::getAdj(NodeID u) const {
		if (0 <= u < M.size()) {
			EList list; 
			for(int i = 0; i < M.at(u).size(); i++) {
				if (M.at(u).at(i) != 0 ) {
					NWPair pair(i, M.at(u).at(i));
					if (pair.second != 0) 
						list.push_back(pair);
				}
			}
			return list;
		}
	}
예제 #4
0
std::list<NWPair> ListGraph::getAdj(NodeID u) const
{
    EList temp;
    EList::const_iterator it;
    for(it = edgeList[u].begin(); it != edgeList[u].end(); it++)
    {
        NWPair theEdge = *it;
        if(theEdge.first != NULL)
            temp.push_back(NWPair(theEdge.first, theEdge.second));
    }
    return temp;

}
예제 #5
0
 EList MatrixGraph::getAdj(NodeID u) const{

	 EList lst;
	 //for the number of nodes
	 for(int i = 0; i < num_nodes; i++){
		 // if they are atached
		 if(ary[u][i] != 0){
			 //add to the vector
			lst.push_back(NWPair(i, ary[u][i]));
		 };
	 };
	 return lst;
 };
예제 #6
0
/**
 * Calculate a vector containing the sizes of all of the patterns in
 * all of the given input files, in order.  Returns the total size of
 * all references combined.  Rewinds each istream before returning.
 */
std::pair<size_t, size_t>
fastaRefReadSizes(
	EList<FileBuf*>& in,
	EList<RefRecord>& recs,
	const RefReadInParams& rparms,
	BitpairOutFileBuf* bpout,
	int& numSeqs)
{
	uint32_t unambigTot = 0;
	uint32_t bothTot = 0;
	RefReadInParams rpcp = rparms;
	assert_gt(in.size(), 0);
	// For each input istream
	for(size_t i = 0; i < in.size(); i++) {
		bool first = true;
		assert(!in[i]->eof());
		// For each pattern in this istream
		while(!in[i]->eof()) {
			RefRecord rec = fastaRefReadSize(*in[i], rparms, first, bpout);
			if((unambigTot + rec.len) < unambigTot) {
				cerr << "Error: Reference sequence has more than 2^32-1 characters!  Please divide the" << endl
				     << "reference into batches or chunks of about 3.6 billion characters or less each" << endl
				     << "and index each independently." << endl;
				throw 1;
			}
			// Add the length of this record.
			if(rec.first) numSeqs++;
			unambigTot += rec.len;
			bothTot += rec.len;
			bothTot += rec.off;
			first = false;
			if(rec.len == 0 && rec.off == 0 && !rec.first) continue;
			recs.push_back(rec);
		}
		// Reset the input stream
		in[i]->reset();
		assert(!in[i]->eof());
#ifndef NDEBUG
		// Check that it's really reset
		int c = in[i]->get();
		assert_eq('>', c);
		in[i]->reset();
		assert(!in[i]->eof());
#endif
	}
	assert_geq(bothTot, 0);
	assert_geq(unambigTot, 0);
	return make_pair(
		unambigTot, // total number of unambiguous DNA characters read
		bothTot); // total number of DNA characters read, incl. ambiguous ones
}
예제 #7
0
파일: edit.cpp 프로젝트: BenLangmead/hisat
/**
 * Merge second argument into the first.  Assume both are sorted to
 * begin with.
 */
void Edit::merge(EList<Edit>& dst, const EList<Edit>& src) {
	size_t di = 0, si = 0;
	while(di < dst.size()) {
		if(src[si].pos < dst[di].pos) {
			dst.insert(src[si], di);
			si++; di++;
		} else if(src[si].pos == dst[di].pos) {
			// There can be two inserts at a given position, but we
			// can't merge them because there's no way to know their
			// order
			assert(src[si].isReadGap() != dst[di].isReadGap());
			if(src[si].isReadGap()) {
				dst.insert(src[si], di);
				si++; di++;
			} else if(dst[di].isReadGap()) {
				di++;
			}
		}
	}
	while(si < src.size()) dst.push_back(src[si++]);
}
예제 #8
0
파일: pat.cpp 프로젝트: BenLangmead/hisat
/**
 * Given the values for all of the various arguments used to specify
 * the read and quality input, create a list of pattern sources to
 * dispense them.
 */
PairedPatternSource* PairedPatternSource::setupPatternSources(
	const EList<string>& si,   // singles, from argv
	const EList<string>& m1,   // mate1's, from -1 arg
	const EList<string>& m2,   // mate2's, from -2 arg
	const EList<string>& m12,  // both mates on each line, from --12 arg
#ifdef USE_SRA
    const EList<string>& sra_accs,
#endif
	const EList<string>& q,    // qualities associated with singles
	const EList<string>& q1,   // qualities associated with m1
	const EList<string>& q2,   // qualities associated with m2
	const PatternParams& p,    // read-in parameters
    size_t nthreads,
	bool verbose)              // be talkative?
{
	EList<PatternSource*>* a  = new EList<PatternSource*>();
	EList<PatternSource*>* b  = new EList<PatternSource*>();
	EList<PatternSource*>* ab = new EList<PatternSource*>();
	// Create list of pattern sources for paired reads appearing
	// interleaved in a single file
	for(size_t i = 0; i < m12.size(); i++) {
		const EList<string>* qs = &m12;
		EList<string> tmp;
		if(p.fileParallel) {
			// Feed query files one to each PatternSource
			qs = &tmp;
			tmp.push_back(m12[i]);
			assert_eq(1, tmp.size());
		}
		ab->push_back(PatternSource::patsrcFromStrings(p, *qs, nthreads));
		if(!p.fileParallel) {
			break;
		}
	}
    
#ifdef USE_SRA
    for(size_t i = 0; i < sra_accs.size(); i++) {
        const EList<string>* qs = &sra_accs;
        EList<string> tmp;
        if(p.fileParallel) {
            // Feed query files one to each PatternSource
            qs = &tmp;
            tmp.push_back(sra_accs[i]);
            assert_eq(1, tmp.size());
        }
        ab->push_back(PatternSource::patsrcFromStrings(p, *qs, nthreads));
        if(!p.fileParallel) {
            break;
        }
    }
#endif

	// Create list of pattern sources for paired reads
	for(size_t i = 0; i < m1.size(); i++) {
		const EList<string>* qs = &m1;
		EList<string> tmpSeq;
		EList<string> tmpQual;
		if(p.fileParallel) {
			// Feed query files one to each PatternSource
			qs = &tmpSeq;
			tmpSeq.push_back(m1[i]);
			assert_eq(1, tmpSeq.size());
		}
		a->push_back(PatternSource::patsrcFromStrings(p, *qs, nthreads));
		if(!p.fileParallel) {
			break;
		}
	}

	// Create list of pattern sources for paired reads
	for(size_t i = 0; i < m2.size(); i++) {
		const EList<string>* qs = &m2;
		EList<string> tmpSeq;
		EList<string> tmpQual;
		if(p.fileParallel) {
			// Feed query files one to each PatternSource
			qs = &tmpSeq;
			tmpSeq.push_back(m2[i]);
			assert_eq(1, tmpSeq.size());
		}
		b->push_back(PatternSource::patsrcFromStrings(p, *qs, nthreads));
		if(!p.fileParallel) {
			break;
		}
	}
	// All mates/mate files must be paired
	assert_eq(a->size(), b->size());

	// Create list of pattern sources for the unpaired reads
	for(size_t i = 0; i < si.size(); i++) {
		const EList<string>* qs = &si;
		PatternSource* patsrc = NULL;
		EList<string> tmpSeq;
		EList<string> tmpQual;
		if(p.fileParallel) {
			// Feed query files one to each PatternSource
			qs = &tmpSeq;
			tmpSeq.push_back(si[i]);
			assert_eq(1, tmpSeq.size());
		}
		patsrc = PatternSource::patsrcFromStrings(p, *qs, nthreads);
		assert(patsrc != NULL);
		a->push_back(patsrc);
		b->push_back(NULL);
		if(!p.fileParallel) {
			break;
		}
	}

	PairedPatternSource *patsrc = NULL;
#ifdef USE_SRA
	if(m12.size() > 0 || sra_accs.size() > 0) {
#else
    if(m12.size() > 0) {
#endif
		patsrc = new PairedSoloPatternSource(ab, p);
		for(size_t i = 0; i < a->size(); i++) delete (*a)[i];
		for(size_t i = 0; i < b->size(); i++) delete (*b)[i];
		delete a; delete b;
	} else {
		patsrc = new PairedDualPatternSource(a, b, p);
		for(size_t i = 0; i < ab->size(); i++) delete (*ab)[i];
		delete ab;
	}
	return patsrc;
}

VectorPatternSource::VectorPatternSource(
	const EList<string>& v,
	const PatternParams& p) :
	PatternSource(p),
	cur_(p.skip),
	skip_(p.skip),
	paired_(false),
	v_(),
	quals_()
{
	for(size_t i = 0; i < v.size(); i++) {
		EList<string> ss;
		tokenize(v[i], ":", ss, 2);
		assert_gt(ss.size(), 0);
		assert_leq(ss.size(), 2);
		// Initialize s
		string s = ss[0];
		int mytrim5 = gTrim5;
		if(gColor && s.length() > 1) {
			// This may be a primer character.  If so, keep it in the
			// 'primer' field of the read buf and parse the rest of the
			// read without it.
			int c = toupper(s[0]);
			if(asc2dnacat[c] > 0) {
				// First char is a DNA char
				int c2 = toupper(s[1]);
				// Second char is a color char
				if(asc2colcat[c2] > 0) {
					mytrim5 += 2; // trim primer and first color
				}
			}
		}
		if(gColor) {
			// Convert '0'-'3' to 'A'-'T'
			for(size_t i = 0; i < s.length(); i++) {
				if(s[i] >= '0' && s[i] <= '4') {
					s[i] = "ACGTN"[(int)s[i] - '0'];
				}
				if(s[i] == '.') s[i] = 'N';
			}
		}
		if(s.length() <= (size_t)(gTrim3 + mytrim5)) {
			// Entire read is trimmed away
			s.clear();
		} else {
			// Trim on 5' (high-quality) end
			if(mytrim5 > 0) {
				s.erase(0, mytrim5);
			}
			// Trim on 3' (low-quality) end
			if(gTrim3 > 0) {
				s.erase(s.length()-gTrim3);
			}
		}
		//  Initialize vq
		string vq;
		if(ss.size() == 2) {
			vq = ss[1];
		}
		// Trim qualities
		if(vq.length() > (size_t)(gTrim3 + mytrim5)) {
			// Trim on 5' (high-quality) end
			if(mytrim5 > 0) {
				vq.erase(0, mytrim5);
			}
			// Trim on 3' (low-quality) end
			if(gTrim3 > 0) {
				vq.erase(vq.length()-gTrim3);
			}
		}
		// Pad quals with Is if necessary; this shouldn't happen
		while(vq.length() < s.length()) {
			vq.push_back('I');
		}
		// Truncate quals to match length of read if necessary;
		// this shouldn't happen
		if(vq.length() > s.length()) {
			vq.erase(s.length());
		}
		assert_eq(vq.length(), s.length());
		v_.expand();
		v_.back().installChars(s);
		quals_.push_back(BTString(vq));
		trimmed3_.push_back(gTrim3);
		trimmed5_.push_back(mytrim5);
		ostringstream os;
		os << (names_.size());
		names_.push_back(BTString(os.str()));
	}
	assert_eq(v_.size(), quals_.size());
}
	
bool VectorPatternSource::nextReadImpl(
	Read& r,
	TReadId& rdid,
	TReadId& endid,
	bool& success,
	bool& done)
{
	// Let Strings begin at the beginning of the respective bufs
	r.reset();
	lock();
	if(cur_ >= v_.size()) {
		unlock();
		// Clear all the Strings, as a signal to the caller that
		// we're out of reads
		r.reset();
		success = false;
		done = true;
		assert(r.empty());
		return false;
	}
	// Copy v_*, quals_* strings into the respective Strings
	r.color = gColor;
	r.patFw  = v_[cur_];
	r.qual = quals_[cur_];
	r.trimmed3 = trimmed3_[cur_];
	r.trimmed5 = trimmed5_[cur_];
	ostringstream os;
	os << cur_;
	r.name = os.str();
	cur_++;
	done = cur_ == v_.size();
	rdid = endid = readCnt_;
	readCnt_++;
	unlock();
	success = true;
	return true;
}
예제 #9
0
static void driver(
                   const string& infile,
                   EList<string>& infiles,
                   const string& snpfile,
                   const string& htfile,
                   const string& ssfile,
                   const string& exonfile,
                   const string& svfile,
                   const string& outfile,
                   bool packed,
                   int reverse)
{
    initializeCntLut();
    initializeCntBit();
	EList<FileBuf*> is(MISC_CAT);
	bool bisulfite = false;
	RefReadInParams refparams(false, reverse, nsToAs, bisulfite);
	assert_gt(infiles.size(), 0);
	if(format == CMDLINE) {
		// Adapt sequence strings to stringstreams open for input
		stringstream *ss = new stringstream();
		for(size_t i = 0; i < infiles.size(); i++) {
			(*ss) << ">" << i << endl << infiles[i].c_str() << endl;
		}
		FileBuf *fb = new FileBuf(ss);
		assert(fb != NULL);
		assert(!fb->eof());
		assert(fb->get() == '>');
		ASSERT_ONLY(fb->reset());
		assert(!fb->eof());
		is.push_back(fb);
	} else {
		// Adapt sequence files to ifstreams
		for(size_t i = 0; i < infiles.size(); i++) {
			FILE *f = fopen(infiles[i].c_str(), "r");
			if (f == NULL) {
				cerr << "Error: could not open "<< infiles[i].c_str() << endl;
				throw 1;
			}
			FileBuf *fb = new FileBuf(f);
			assert(fb != NULL);
			if(fb->peek() == -1 || fb->eof()) {
				cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl;
				continue;
			}
			assert(!fb->eof());
			assert(fb->get() == '>');
			ASSERT_ONLY(fb->reset());
			assert(!fb->eof());
			is.push_back(fb);
		}
	}
	if(is.empty()) {
		cerr << "Warning: All fasta inputs were empty" << endl;
		throw 1;
	}
    filesWritten.push_back(outfile + ".1." + gfm_ext);
    filesWritten.push_back(outfile + ".2." + gfm_ext);
	// Vector for the ordered list of "records" comprising the input
	// sequences.  A record represents a stretch of unambiguous
	// characters in one of the input sequences.
	EList<RefRecord> szs(MISC_CAT);
	std::pair<size_t, size_t> sztot;
	{
		if(verbose) cerr << "Reading reference sizes" << endl;
		Timer _t(cerr, "  Time reading reference sizes: ", verbose);
		if(!reverse && (writeRef || justRef)) {
			filesWritten.push_back(outfile + ".3." + gfm_ext);
			filesWritten.push_back(outfile + ".4." + gfm_ext);
			sztot = BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, szs, sanityCheck);
		} else {
			sztot = BitPairReference::szsFromFasta(is, string(), bigEndian, refparams, szs, sanityCheck);
		}
	}
	if(justRef) return;
	assert_gt(sztot.first, 0);
	assert_gt(sztot.second, 0);
	assert_gt(szs.size(), 0);
    
	// Construct index from input strings and parameters	
    filesWritten.push_back(outfile + ".5." + gfm_ext);
    filesWritten.push_back(outfile + ".6." + gfm_ext);
    filesWritten.push_back(outfile + ".7." + gfm_ext);
    filesWritten.push_back(outfile + ".8." + gfm_ext);
	TStr s;
	HGFM<TIndexOffU> hGFM(
                          s,
                          packed,
                          1,  // TODO: maybe not?
                          lineRate,
                          offRate,      // suffix-array sampling rate
                          ftabChars,    // number of chars in initial arrow-pair calc
                          localOffRate,
                          localFtabChars,
                          nthreads,
                          snpfile,
                          htfile,
                          ssfile,
                          exonfile,
                          svfile,
                          outfile,      // basename for .?.ht2 files
                          reverse == 0, // fw
                          !entireSA,    // useBlockwise
                          bmax,         // block size for blockwise SA builder
                          bmaxMultSqrt, // block size as multiplier of sqrt(len)
                          bmaxDivN,     // block size as divisor of len
                          noDc? 0 : dcv,// difference-cover period
                          is,           // list of input streams
                          szs,          // list of reference sizes
                          (TIndexOffU)sztot.first,  // total size of all unambiguous ref chars
                          refparams,    // reference read-in parameters
                          seed,         // pseudo-random number generator seed
                          -1,           // override offRate
                          verbose,      // be talkative
                          autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
                          sanityCheck); // verify results and internal consistency
    // Note that the Ebwt is *not* resident in memory at this time.  To
    // load it into memory, call ebwt.loadIntoMemory()
	if(verbose) {
		// Print Ebwt's vital stats
		hGFM.gh().print(cerr);
	}
	if(sanityCheck) {
		// Try restoring the original string (if there were
		// multiple texts, what we'll get back is the joined,
		// padded string, not a list)
		hGFM.loadIntoMemory(
                            reverse ? (refparams.reverse == REF_READ_REVERSE) : 0,
                            true,  // load SA sample?
                            true,  // load ftab?
                            true,  // load rstarts?
                            false,
                            false);
		SString<char> s2;
		hGFM.restore(s2);
		hGFM.evictFromMemory();
		{
			SString<char> joinedss = GFM<>::join<SString<char> >(
				is,          // list of input streams
				szs,         // list of reference sizes
				(TIndexOffU)sztot.first, // total size of all unambiguous ref chars
				refparams,   // reference read-in parameters
				seed);       // pseudo-random number generator seed
			if(refparams.reverse == REF_READ_REVERSE) {
				joinedss.reverse();
			}
			assert_eq(joinedss.length(), s2.length());
			assert(sstr_eq(joinedss, s2));
		}
		if(verbose) {
			if(s2.length() < 1000) {
				cout << "Passed restore check: " << s2.toZBuf() << endl;
			} else {
				cout << "Passed restore check: (" << s2.length() << " chars)" << endl;
			}
		}
	}
}
예제 #10
0
/**
 * A way of feeding simply tests to the seed alignment infrastructure.
 */
int main(int argc, char **argv) {

    EList<string> strs;
    //                            GCTATATAGCGCGCTCGCATCATTTTGTGT
    strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA"
                          "NNNNNNNNNN"
                          "CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA"));
    //                            GCTATATAGCGCGCTTGCATCATTTTGTGT
    //                                           ^
    bool packed = false;
    int color = 0;
	pair<GFM*, GFM*> gfms = GFM::fromStrings<SString<char> >(
		strs,
		packed,
		REF_READ_REVERSE,
		Ebwt::default_bigEndian,
		Ebwt::default_lineRate,
		Ebwt::default_offRate,
		Ebwt::default_ftabChars,
		".aligner_seed2.cpp.tmp",
		Ebwt::default_useBlockwise,
		Ebwt::default_bmax,
		Ebwt::default_bmaxMultSqrt,
		Ebwt::default_bmaxDivN,
		Ebwt::default_dcv,
		Ebwt::default_seed,
		false,  // verbose
		false,  // autoMem
		false); // sanity
    
    gfms.first->loadIntoMemory (-1, true, true, true, true, false);
    gfms.second->loadIntoMemory(1, true, true, true, true, false);
	
	int testnum = 0;

	// Query is longer than ftab and matches exactly twice
    for(int rc = 0; rc < 2; rc++) {
		for(int i = 0; i < 2; i++) {
			cerr << "Test " << (++testnum) << endl;
			cerr << "  Query with length greater than ftab" << endl;
			DescentMetrics mets;
			PerReadMetrics prm;
			DescentDriver dr;
			
			// Set up the read
			BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
			if(rc) {
				seq.reverseComp();
				qual.reverse();
			}
			dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);

			// Set up the DescentConfig
			DescentConfig conf;
			conf.cons.init(GFM::default_ftabChars, 1.0);
			conf.expol = DESC_EX_NONE;
			
			// Set up the search roots
			dr.addRoot(
				conf,   // DescentConfig
				(i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root
				(i == 0) ? true : false,           // left-to-right?
				rc == 0,   // forward?
				0.0f);   // root priority
			
			// Do the search
			Scoring sc = Scoring::base1();
			dr.go(sc, *gfms.first, *gfms.second, mets, prm);
			
			// Confirm that an exact-matching alignment was found
			assert_eq(1, dr.sink().nrange());
			assert_eq(2, dr.sink().nelt());
		}
	}
	
	// Query has length euqal to ftab and matches exactly twice
    for(int i = 0; i < 2; i++) {
		cerr << "Test " << (++testnum) << endl;
		cerr << "  Query with length equal to ftab" << endl;
        DescentMetrics mets;
		PerReadMetrics prm;
        DescentDriver dr;
        
        // Set up the read
        BTDnaString seq ("GCTATATAGC", true);
        BTString    qual("ABCDEFGHIa");
		dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
        
        // Set up the DescentConfig
        DescentConfig conf;
        conf.cons.init(GFM::default_ftabChars, 1.0);
        conf.expol = DESC_EX_NONE;
        
        // Set up the search roots
        dr.addRoot(
            conf,   // DescentConfig
            (i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root
            (i == 0) ? true : false,           // left-to-right?
            true,   // forward?
            0.0f);   // root priority
        
        // Do the search
        Scoring sc = Scoring::base1();
        dr.go(sc, *gfms.first, *gfms.second, mets, prm);
		
		// Confirm that an exact-matching alignment was found
		assert_eq(1, dr.sink().nrange());
		assert_eq(2, dr.sink().nelt());
    }

	// Query has length less than ftab length and matches exactly twice
    for(int i = 0; i < 2; i++) {
		cerr << "Test " << (++testnum) << endl;
		cerr << "  Query with length less than ftab" << endl;
        DescentMetrics mets;
		PerReadMetrics prm;
        DescentDriver dr;
        
        // Set up the read
        BTDnaString seq ("GCTATATAG", true);
        BTString    qual("ABCDEFGHI");
		dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
        
        // Set up the DescentConfig
        DescentConfig conf;
        conf.cons.init(GFM::default_ftabChars, 1.0);
        conf.expol = DESC_EX_NONE;
        
        // Set up the search roots
        dr.addRoot(
            conf,   // DescentConfig
            (i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root
            (i == 0) ? true : false,           // left-to-right?
            true,   // forward?
            0.0f);   // root priority
        
        // Do the search
        Scoring sc = Scoring::base1();
        dr.go(sc, *gfms.first, *gfms.second, mets, prm);
		
		// Confirm that an exact-matching alignment was found
		assert_eq(1, dr.sink().nrange());
		assert_eq(2, dr.sink().nelt());
    }
	
	// Search root is in the middle of the read, requiring a bounce
    for(int i = 0; i < 2; i++) {
		cerr << "Test " << (++testnum) << endl;
		cerr << "  Search root in middle of read" << endl;
        DescentMetrics mets;
		PerReadMetrics prm;
        DescentDriver dr;
        
        // Set up the read
		//                012345678901234567890123456789
        BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
        BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
		TIndexOffU top, bot;
		top = bot = 0;
		bool ret = gfms.first->contains("GCGCTCGCATCATTTTGTGT", &top, &bot);
		cerr << ret << ", " << top << ", " << bot << endl;
		dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
        
        // Set up the DescentConfig
        DescentConfig conf;
        conf.cons.init(GFM::default_ftabChars, 1.0);
        conf.expol = DESC_EX_NONE;
        
        // Set up the search roots
        dr.addRoot(
            conf,   // DescentConfig
            (i == 0) ? 10 : (seq.length() - 1 - 10), // 5' offset into read of root
            (i == 0) ? true : false,                 // left-to-right?
            true,   // forward?
            0.0f);   // root priority
        
        // Do the search
        Scoring sc = Scoring::base1();
        dr.go(sc, *gfms.first, *gfms.second, mets, prm);
		
		// Confirm that an exact-matching alignment was found
		assert_eq(1, dr.sink().nrange());
		assert_eq(2, dr.sink().nelt());
    }

	delete gfms.first;
	delete gfms.second;
	
	strs.clear();
    strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA"
                          "NNNNNNNNNN"
                          "CATGTCAGCTATATAGCG"));
	gfms = GFM::fromStrings<SString<char> >(
		strs,
		packed,
		REF_READ_REVERSE,
		GFM::default_bigEndian,
		GFM::default_lineRate,
		GFM::default_offRate,
		GFM::default_ftabChars,
		".aligner_seed2.cpp.tmp",
		GFM::default_useBlockwise,
		GFM::default_bmax,
		GfM::default_bmaxMultSqrt,
		GFM::default_bmaxDivN,
		GFM::default_dcv,
		GFM::default_seed,
		false,  // verbose
		false,  // autoMem
		false); // sanity
    
    gfms.first->loadIntoMemory (-1, true, true, true, true, false);
    gfms.second->loadIntoMemory(1, true, true, true, true, false);
	
	// Query is longer than ftab and matches exactly once.  One search root for
	// forward read.
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
			for(size_t j = 0; j < seq.length(); j++) {
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query with length greater than ftab and matches exactly once" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				// Set up the read
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
				
				// Set up the DescentConfig
				DescentConfig conf;
				conf.cons.init(GFM::default_ftabChars, 1.0);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					true,   // forward?
					0.0f);   // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(1, dr.sink().nrange());
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				assert_eq(1, dr.sink().nelt());
			}
		}
	}

	// Query is longer than ftab and its reverse complement matches exactly
	// once.  Search roots on forward and reverse-comp reads.
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
			for(size_t j = 0; j < seq.length(); j++) {
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query with length greater than ftab and reverse complement matches exactly once" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				// Set up the read
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
				
				// Set up the DescentConfig
				DescentConfig conf;
				conf.cons.init(GFM::default_ftabChars, 1.0);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					true,   // forward?
					0.0f);   // root priority
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					false,  // forward?
					1.0f);   // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(1, dr.sink().nrange());
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				assert_eq(1, dr.sink().nelt());
			}
		}
	}

	// Query is longer than ftab and matches exactly once with one mismatch
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
			//                ||||||||||||||||||||||||||||||
			BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
			//                012345678901234567890123456789
			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
			for(size_t k = 0; k < orig.length(); k++) {
				BTDnaString seq = orig;
				seq.set(seq[k] ^ 3, k);
				for(size_t j = 0; j < seq.length(); j++) {
					// Assume left-to-right
					size_t beg = j;
					size_t end = j + GFM::default_ftabChars;
					// Mismatch penalty is 3, so we have to skip starting
					// points that are within 2 from the mismatch
					if((i > 0 && j > 0) || j == seq.length()-1) {
						// Right-to-left
						if(beg < GFM::default_ftabChars) {
							beg = 0;
						} else {
							beg -= GFM::default_ftabChars;
						}
						end -= GFM::default_ftabChars;
					}
					size_t kk = k;
					//if(rc) {
					//	kk = seq.length() - k - 1;
					//}
					if(beg <= kk && end > kk) {
						continue;
					}
					if((j > kk) ? (j - kk <= 2) : (kk - j <= 2)) {
						continue;
					}
					cerr << "Test " << (++testnum) << endl;
					cerr << "  Query with length greater than ftab and matches exactly once with 1mm" << endl;
					DescentMetrics mets;
					PerReadMetrics prm;
					DescentDriver dr;
					
					dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
					
					// Set up the DescentConfig
					DescentConfig conf;
					// Changed 
					conf.cons.init(0, 1.0);
					conf.expol = DESC_EX_NONE;
					
					// Set up the search roots
					dr.addRoot(
						conf,    // DescentConfig
						j,       // 5' offset into read of root
						i == 0,  // left-to-right?
						true,    // forward?
						0.0f);    // root priority
					
					// Do the search
					Scoring sc = Scoring::base1();
					dr.go(sc, *gfms.first, *gfms.second, mets, prm);
					
					// Confirm that an exact-matching alignment was found
					assert_eq(1, dr.sink().nrange());
					assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
					assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
					cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
					assert_eq(1, dr.sink().nelt());
					last_topf = dr.sink()[0].topf;
					last_botf = dr.sink()[0].botf;
				}
			}
		}
    }

	// Query is longer than ftab and matches exactly once with one N mismatch
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
			//                ||||||||||||||||||||||||||||||
			BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
			//                012345678901234567890123456789
			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
			for(size_t k = 0; k < orig.length(); k++) {
				BTDnaString seq = orig;
				seq.set(4, k);
				for(size_t j = 0; j < seq.length(); j++) {
					// Assume left-to-right
					size_t beg = j;
					size_t end = j + GFM::default_ftabChars;
					// Mismatch penalty is 3, so we have to skip starting
					// points that are within 2 from the mismatch
					if((i > 0 && j > 0) || j == seq.length()-1) {
						// Right-to-left
						if(beg < GFM::default_ftabChars) {
							beg = 0;
						} else {
							beg -= GFM::default_ftabChars;
						}
						end -= GFM::default_ftabChars;
					}
					if(beg <= k && end > k) {
						continue;
					}
					if((j > k) ? (j - k <= 2) : (k - j <= 2)) {
						continue;
					}
					cerr << "Test " << (++testnum) << endl;
					cerr << "  Query with length greater than ftab and matches exactly once with 1mm" << endl;
					DescentMetrics mets;
					PerReadMetrics prm;
					DescentDriver dr;
					
					dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
					
					// Set up the DescentConfig
					DescentConfig conf;
					// Changed 
					conf.cons.init(0, 1.0);
					conf.expol = DESC_EX_NONE;
					
					// Set up the search roots
					dr.addRoot(
						conf,   // DescentConfig
						j,      // 5' offset into read of root
						i == 0, // left-to-right?
						true,   // forward?
						0.0f);   // root priority
					
					// Do the search
					Scoring sc = Scoring::base1();
					dr.go(sc, *gfms.first, *gfms.second, mets, prm);
					
					// Confirm that an exact-matching alignment was found
					assert_eq(1, dr.sink().nrange());
					assert_eq(sc.n(40), dr.sink()[0].pen);
					assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
					assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
					cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
					assert_eq(1, dr.sink().nelt());
					last_topf = dr.sink()[0].topf;
					last_botf = dr.sink()[0].botf;
				}
			}
		}
    }

	// Throw a bunch of queries with a bunch of Ns in and try to force an assert
	{
		RandomSource rnd(79);
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
			//                ||||||||||||||||||||||||||||||
			BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
			//                012345678901234567890123456789
			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
			if(i == 1) {
				orig.reverseComp();
				qual.reverse();
			}
			for(size_t trials = 0; trials < 100; trials++) {
				BTDnaString seq = orig;
				size_t ns = 10;
				for(size_t k = 0; k < ns; k++) {
					size_t pos = rnd.nextU32() % seq.length();
					seq.set(4, pos);
				}
				
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query with a bunch of Ns" << endl;
				
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed 
				conf.cons.init(GFM::default_ftabChars, 1.0);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				for(size_t k = 0; k < ns; k++) {
					size_t j = rnd.nextU32() % seq.length();
					bool ltr = (rnd.nextU2() == 0) ? true : false;
					bool fw = (rnd.nextU2() == 0) ? true : false;
					dr.addRoot(
						conf,   // DescentConfig
						j,      // 5' offset into read of root
						ltr,    // left-to-right?
						fw,     // forward?
						0.0f);   // root priority
				}
				
				// Do the search
				Scoring sc = Scoring::base1();
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
			}
		}
    }

	// Query is longer than ftab and matches exactly once with one mismatch
	{
		RandomSource rnd(77);
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
			//                ||||||||||||||||||||||||||||||
			BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
			//                012345678901234567890123456789
			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
			//       revcomp: ACACAAAATGATGCGAGCGCGCTATATAGC
			//       revqual: cbaIHGFEDCBAihgfedcbaIHGFEDCBA
			bool fwi = (i == 0);
			if(!fwi) {
				orig.reverseComp();
			}
			for(size_t k = 0; k < orig.length(); k++) {
				BTDnaString seq = orig;
				seq.set(seq[k] ^ 3, k);
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query with length greater than ftab and matches exactly once with 1mm.  Many search roots." << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed 
				conf.cons.init(0, 1.0);
				conf.expol = DESC_EX_NONE;
				
				// Set up several random search roots
				bool onegood = false;
				for(size_t y = 0; y < 10; y++) {
					size_t j = rnd.nextU32() % seq.length();
					bool ltr = (rnd.nextU2() == 0) ? true : false;
					bool fw = (rnd.nextU2() == 0) ? true : false;
					dr.addRoot(
						conf,     // DescentConfig
						(TReadOff)j,        // 5' offset into read of root
						ltr,      // left-to-right?
						fw,       // forward?
						(float)((float)y * 1.0f)); // root priority
					// Assume left-to-right
					size_t beg = j;
					size_t end = j + GFM::default_ftabChars;
					// Mismatch penalty is 3, so we have to skip starting
					// points that are within 2 from the mismatch
					if(!ltr) {
						// Right-to-left
						if(beg < GFM::default_ftabChars) {
							beg = 0;
						} else {
							beg -= GFM::default_ftabChars;
						}
						end -= GFM::default_ftabChars;
					}
					bool good = true;
					if(fw != fwi) {
						good = false;
					}
					if(beg <= k && end > k) {
						good = false;
					}
					if((j > k) ? (j - k <= 2) : (k - j <= 2)) {
						good = false;
					}
					if(good) {
						onegood = true;
					}
				}
				if(!onegood) {
					continue;
				}
				
				// Do the search
				Scoring sc = Scoring::base1();
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(1, dr.sink().nrange());
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
				assert_eq(1, dr.sink().nelt());
				last_topf = dr.sink()[0].topf;
				last_botf = dr.sink()[0].botf;
			}
		}
    }

	// Query is longer than ftab and matches exactly once with one read gap
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
		for(int k = 0; k < 2; k++) {
			// Set up the read
			//                GCTATATAGCGCGCCTGCATCATTTTGTGT
			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
			//                |||||||||||||||///////////////
			BTDnaString seq ("GCTATATAGCGCGCTGCATCATTTTGTGT", true);
			//                01234567890123456789012345678
			//                87654321098765432109876543210
			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIab");
			if(k == 1) {
				seq.reverseComp();
				qual.reverse();
			}
			assert_eq(seq.length(), qual.length());
			// js iterate over offsets from 5' end for the search root
			for(size_t j = 0; j < seq.length(); j++) {
				// Assume left-to-right
				size_t beg = j;
				if(k == 1) {
					beg = seq.length() - beg - 1;
				}
				size_t end = beg + GFM::default_ftabChars;
				// Mismatch penalty is 3, so we have to skip starting
				// points that are within 2 from the mismatch
				if((i > 0 && j > 0) || j == seq.length()-1) {
					// Right-to-left
					if(beg < GFM::default_ftabChars) {
						beg = 0;
					} else {
						beg -= GFM::default_ftabChars;
					}
					end -= GFM::default_ftabChars;
				}
				assert_geq(end, beg);
				if(beg <= 15 && end >= 15) {
					continue;
				}
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query matches once with a read gap of length 1" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				Read q("test", seq.toZBuf(), qual.toZBuf());
				assert(q.repOk());
				dr.initRead(q, -30, 30);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed 
				conf.cons.init(0, 0.5);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					k == 0, // forward?
					0.0f);  // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(1, dr.sink().nrange());
				assert_eq(sc.readGapOpen() + 0 * sc.readGapExtend(), dr.sink()[0].pen);
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
				assert_eq(1, dr.sink().nelt());
				last_topf = dr.sink()[0].topf;
				last_botf = dr.sink()[0].botf;
			}
		}}
    }

	// Query is longer than ftab and matches exactly once with one read gap of
	// length 3
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
		for(int k = 0; k < 2; k++) {
			// Set up the read
			//                GCTATATAGCGCGCGCTCATCATTTTGTGT
			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
			//                ||||||||||||||   |||||||||||||
			BTDnaString seq ("GCTATATAGCGCGC" "CATCATTTTGTGT", true);
			//                01234567890123   4567890123456
			//                65432109876543   2109876543210
			BTString    qual("ABCDEFGHIabcde" "fghiABCDEFGHI");
			if(k == 1) {
				seq.reverseComp();
				qual.reverse();
			}
			for(size_t j = 0; j < seq.length(); j++) {
				// Assume left-to-right
				size_t beg = j;
				if(k == 1) {
					beg = seq.length() - beg - 1;
				}
				size_t end = beg + GFM::default_ftabChars;
				// Mismatch penalty is 3, so we have to skip starting
				// points that are within 2 from the mismatch
				if((i > 0 && j > 0) || j == seq.length()-1) {
					// Right-to-left
					if(beg < GFM::default_ftabChars) {
						beg = 0;
					} else {
						beg -= GFM::default_ftabChars;
					}
					end -= GFM::default_ftabChars;
				}
				if(beg <= 14 && end >= 14) {
					continue;
				}
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query matches once with a read gap of length 3" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed
				conf.cons.init(0, 0.2);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					k == 0, // forward?
					0.0f);  // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				// Need to adjust the mismatch penalty up to avoid alignments
				// with lots of mismatches.
				sc.setMmPen(COST_MODEL_CONSTANT, 6, 6);
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(1, dr.sink().nrange());
				assert_eq(sc.readGapOpen() + 2 * sc.readGapExtend(), dr.sink()[0].pen);
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
				assert_eq(1, dr.sink().nelt());
				last_topf = dr.sink()[0].topf;
				last_botf = dr.sink()[0].botf;
			}
		}}
    }

	// Query is longer than ftab and matches exactly once with one reference gap
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//    Ref: CATGTCAGCTATATAGCGCGC" "TCGCATCATTTTGTGTGTAAACCA
			//                ||||||||||||||   ||||||||||||||||
			BTDnaString seq ("GCTATATAGCGCGCA""TCGCATCATTTTGTGT", true);
			//                012345678901234  5678901234567890
			BTString    qual("ABCDEFGHIabcdef""ghiABCDEFGHIabcd");
			for(size_t j = 0; j < seq.length(); j++) {
				// Assume left-to-right
				size_t beg = j;
				size_t end = j + GFM::default_ftabChars;
				// Mismatch penalty is 3, so we have to skip starting
				// points that are within 2 from the mismatch
				if((i > 0 && j > 0) || j == seq.length()-1) {
					// Right-to-left
					if(beg < GFM::default_ftabChars) {
						beg = 0;
					} else {
						beg -= GFM::default_ftabChars;
					}
					end -= GFM::default_ftabChars;
				}
				if(beg <= 14 && end >= 14) {
					continue;
				}
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query matches once with a reference gap of length 1" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed 
				conf.cons.init(1, 0.5);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					true,   // forward?
					0.0f);  // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				// Need to adjust the mismatch penalty up to avoid alignments
				// with lots of mismatches.
				sc.setMmPen(COST_MODEL_CONSTANT, 6, 6);
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(1, dr.sink().nrange());
				assert_eq(sc.refGapOpen() + 0 * sc.refGapExtend(), dr.sink()[0].pen);
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
				assert_eq(1, dr.sink().nelt());
				last_topf = dr.sink()[0].topf;
				last_botf = dr.sink()[0].botf;
			}
		}
    }

	// Query is longer than ftab and matches exactly once with one reference gap
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//    Ref: CATGTCAGCTATATAGCGCGC"   "TCGCATCATTTTGTGTGTAAACCA
			//                ||||||||||||||     ||||||||||||||||
			BTDnaString seq ("GCTATATAGCGCGCATG""TCGCATCATTTTGTGT", true);
			//                01234567890123456  7890123456789012
			BTString    qual("ABCDEFGHIabcdefgh""iABCDEFGHIabcdef");
			for(size_t j = 0; j < seq.length(); j++) {
				// Assume left-to-right
				size_t beg = j;
				size_t end = j + GFM::default_ftabChars;
				// Mismatch penalty is 3, so we have to skip starting
				// points that are within 2 from the mismatch
				if((i > 0 && j > 0) || j == seq.length()-1) {
					// Right-to-left
					if(beg < GFM::default_ftabChars) {
						beg = 0;
					} else {
						beg -= GFM::default_ftabChars;
					}
					end -= GFM::default_ftabChars;
				}
				if(beg <= 14 && end >= 14) {
					continue;
				}
				if(beg <= 15 && end >= 15) {
					continue;
				}
				if(beg <= 16 && end >= 16) {
					continue;
				}
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query matches once with a reference gap of length 1" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -30, 30);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed 
				conf.cons.init(1, 0.25);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					true,   // forward?
					0.0f);  // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				// Need to adjust the mismatch penalty up to avoid alignments
				// with lots of mismatches.
				sc.setMmPen(COST_MODEL_CONSTANT, 6, 6);
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(1, dr.sink().nrange());
				assert_eq(sc.refGapOpen() + 2 * sc.refGapExtend(), dr.sink()[0].pen);
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
				assert_eq(1, dr.sink().nelt());
				last_topf = dr.sink()[0].topf;
				last_botf = dr.sink()[0].botf;
			}
		}
    }

	// Query is longer than ftab and matches exactly once with one read gap,
	// one ref gap, and one mismatch
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//           Ref: CATGTCAGCT   ATATAGCGCGCT  CGCATCATTTTGTGTGTAAACCA
			//                ||||||||||   ||||||||||||   |||||| |||||||||||||
			BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGTGTAAAC", true);
			//                0123456789  0123456789012   34567890123456789012
			BTString    qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF");
			for(size_t j = 0; j < seq.length(); j++) {
				// Assume left-to-right
				size_t beg = j;
				size_t end = j + GFM::default_ftabChars;
				// Mismatch penalty is 3, so we have to skip starting
				// points that are within 2 from the mismatch
				if((i > 0 && j > 0) || j == seq.length()-1) {
					// Right-to-left
					if(beg < GFM::default_ftabChars) {
						beg = 0;
					} else {
						beg -= GFM::default_ftabChars;
					}
					end -= GFM::default_ftabChars;
				}
				if(beg <= 10 && end >= 10) {
					continue;
				}
				if(beg <= 22 && end >= 22) {
					continue;
				}
				if(beg <= 30 && end >= 30) {
					continue;
				}
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query matches once with a read gap of length 1" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -50, 50);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed 
				conf.cons.init(1, 0.5);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					true,   // forward?
					0.0f);  // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(1, dr.sink().nrange());
				assert_eq(sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[0].pen);
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
				assert_eq(1, dr.sink().nelt());
				last_topf = dr.sink()[0].topf;
				last_botf = dr.sink()[0].botf;
			}
		}
    }

	delete gfms.first;
	delete gfms.second;
	
	//  Ref CATGTCAGCT-ATATAGCGCGCTCGCATCATTTTGTGTGTAAAC
	//      |||||||||| |||||||||||| |||||| |||||||||||||
	//  Rd  CATGTCAGCTGATATAGCGCGCT-GCATCAATTTGTGTGTAAAC
	strs.clear();
    strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAAC"
                          "NNNNNNNNNN"
                          "CATGTCAGCTGATATAGCGCGCTCGCATCATTTTGTGTGTAAAC" // same but without first ref gap
                          "N"
                          "CATGTCAGCTATATAGCGCGCTGCATCATTTTGTGTGTAAAC" // same but without first read gap
                          "N"
                          "CATGTCAGCTATATAGCGCGCTCGCATCAATTTGTGTGTAAAC" // same but without first mismatch
                          "N"
                          "CATGTCAGCTGATATAGCGCGCTGCATCAATTTGTGTGTAAAC" // Exact match for read
						  ));
	gfms = GFM::fromStrings<SString<char> >(
		strs,
		packed,
		REF_READ_REVERSE,
		GFM::default_bigEndian,
		GFM::default_lineRate,
		GFM::default_offRate,
		GFM::default_ftabChars,
		".aligner_seed2.cpp.tmp",
		GFM::default_useBlockwise,
		GFM::default_bmax,
		GFM::default_bmaxMultSqrt,
		GFM::default_bmaxDivN,
		GFM::default_dcv,
		GFM::default_seed,
		false,  // verbose
		false,  // autoMem
		false); // sanity
    
    gfms.first->loadIntoMemory (color, -1, true, true, true, true, false);
    gfms.second->loadIntoMemory(color,  1, true, true, true, true, false);

	// Query is longer than ftab and matches exactly once with one read gap,
	// one ref gap, and one mismatch
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//           Ref: CATGTCAGCT   ATATAGCGCGCT  CGCATCATTTTGTGTGTAAACCA
			//                ||||||||||   ||||||||||||   |||||| |||||||||||||
			BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGTGTAAAC", true);
			//                0123456789  0123456789012   34567890123456789012
			BTString    qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF");
			for(size_t j = 0; j < seq.length(); j++) {
				// Assume left-to-right
				size_t beg = j;
				size_t end = j + GFM::default_ftabChars;
				// Mismatch penalty is 3, so we have to skip starting
				// points that are within 2 from the mismatch
				if((i > 0 && j > 0) || j == seq.length()-1) {
					// Right-to-left
					if(beg < GFM::default_ftabChars) {
						beg = 0;
					} else {
						beg -= GFM::default_ftabChars;
					}
					end -= GFM::default_ftabChars;
				}
				if(beg <= 10 && end >= 10) {
					continue;
				}
				if(beg <= 22 && end >= 22) {
					continue;
				}
				if(beg <= 30 && end >= 30) {
					continue;
				}
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query matches once with a read gap of length 1" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -50, 50);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed 
				conf.cons.init(1, 0.5);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					true,   // forward?
					0.0f);  // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(5, dr.sink().nrange());
				assert_eq(0, dr.sink()[0].pen);
				assert_eq(min(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[1].pen);
				assert_eq(max(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[2].pen);
				assert_eq(sc.readGapOpen() + sc.refGapOpen(), dr.sink()[3].pen);
				assert_eq(sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[4].pen);
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
				assert_eq(5, dr.sink().nelt());
				last_topf = dr.sink()[0].topf;
				last_botf = dr.sink()[0].botf;
			}
		}
    }

	// Query is longer than ftab and matches exactly once with one read gap,
	// one ref gap, one mismatch, and one N
	{
		size_t last_topf = std::numeric_limits<size_t>::max();
		size_t last_botf = std::numeric_limits<size_t>::max();
		for(int i = 0; i < 2; i++) {
			// Set up the read
			//           Ref: CATGTCAGCT   ATATAGCGCGCT  CGCATCATTTTGTGTGTAAACCA
			//                ||||||||||   ||||||||||||   |||||| |||||| ||||||
			BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGNGTAAAC", true);
			//                0123456789  0123456789012   34567890123456789012
			BTString    qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF");
			for(size_t j = 0; j < seq.length(); j++) {
				// Assume left-to-right
				size_t beg = j;
				size_t end = j + GFM::default_ftabChars;
				// Mismatch penalty is 3, so we have to skip starting
				// points that are within 2 from the mismatch
				if((i > 0 && j > 0) || j == seq.length()-1) {
					// Right-to-left
					if(beg < GFM::default_ftabChars) {
						beg = 0;
					} else {
						beg -= GFM::default_ftabChars;
					}
					end -= GFM::default_ftabChars;
				}
				if(beg <= 10 && end >= 10) {
					continue;
				}
				if(beg <= 22 && end >= 22) {
					continue;
				}
				if(beg <= 30 && end >= 30) {
					continue;
				}
				if(beg <= 36 && end >= 36) {
					continue;
				}
				cerr << "Test " << (++testnum) << endl;
				cerr << "  Query matches with various patterns of gaps, mismatches and Ns" << endl;
				DescentMetrics mets;
				PerReadMetrics prm;
				DescentDriver dr;
				
				dr.initRead(Read("test", seq.toZBuf(), qual.toZBuf()), -50, 50);
				
				// Set up the DescentConfig
				DescentConfig conf;
				// Changed 
				conf.cons.init(1, 0.5);
				conf.expol = DESC_EX_NONE;
				
				// Set up the search roots
				dr.addRoot(
					conf,   // DescentConfig
					j,      // 5' offset into read of root
					i == 0, // left-to-right?
					true,   // forward?
					0.0f);  // root priority
				
				// Do the search
				Scoring sc = Scoring::base1();
				sc.setNPen(COST_MODEL_CONSTANT, 1);
				dr.go(sc, *gfms.first, *gfms.second, mets, prm);
				
				// Confirm that an exact-matching alignment was found
				assert_eq(5, dr.sink().nrange());
				assert_eq(sc.n(40), dr.sink()[0].pen);
				assert_eq(sc.n(40) + min(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[1].pen);
				assert_eq(sc.n(40) + max(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[2].pen);
				assert_eq(sc.n(40) + sc.readGapOpen() + sc.refGapOpen(), dr.sink()[3].pen);
				assert_eq(sc.n(40) + sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[4].pen);
				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
				assert_eq(5, dr.sink().nelt());
				last_topf = dr.sink()[0].topf;
				last_botf = dr.sink()[0].botf;
			}
		}
    }

    delete gfms.first;
    delete gfms.second;
	
	cerr << "DONE" << endl;
}
예제 #11
0
/**
 * Merman main driver function.  Does the following:
 *
 * 1. Parses command-line options
 */
int merman(int argc, char **argv) {
	reset();
	try {
		parseCommandLine(argc, argv);
		Timer tov(cerr, "Overall time: ", timing);
		EList<string> refstrs;
		ReferenceSet refs;
		EList<string> refnames;
		EList<size_t> reflens;
		string refstr = argv[optind++];
		tokenize(refstr, ",", refstrs);
		auto_ptr<MerIndex> ind(
			new MerIndex(ap, rp, readLen, seedWidth, nk.first, nk.second,
			             specificity, begin, naiveCheck, nthreads));
		{
			Timer t(cerr, "... ", timing);
			if(timing) cerr << "Reading reference sequences..." << endl;
			for(size_t i = 0; i < refstrs.size(); i++) {
				if(timing) {
					cerr << "  Sequence " << (i+1) << " of " << refstrs.size() << endl;
				}
				if(refIsStr) {
					refs.addOrigReferenceString(refstrs[i].c_str(), rp);
				} else {
					refs.addOrigReferenceFasta(refstrs[i].c_str(), rp);
				}
			}
			for(size_t i = 0; i < refs.numRefs(); i++) {
				refnames.push_back(string(refs[i].name.toZBuf()));
				reflens.push_back(refs[i].seq.length(color));
			}
			if(refs.numRefs() == 0) {
				cerr << "Warning: No references were found" << endl;
			}
			if(rp.genCrick) {
				if(timing) {
					cerr << "  Crickizing" << endl;
				}
				// Add the crick strand.  If there were bisulfite
				// transformations to the Watson strand, they are
				// removed from the Watson strand before the Crick copy
				// is made.  Transformations are then applied to the
				// new Crick strand.  This has the effect of correctly
				// producing either Watson / Crick in the non-bisulfite
				// case, or BS Watson / BS Crick in the bisulfite case.
				refs.addReferenceRevComps(rp, false, 1, 0);
			}
			if(rp.genRevcomps) {
				if(timing) {
					cerr << "  Adding reverse comps" << endl;
				}
				// Add reverse complements of all existing references
				// (after the transformations have already been
				// applied).
				refs.addReferenceRevComps(rp, true, -1, 1);
			}
			assert(refs.repOk());
		}

		pair<size_t, size_t> mers = make_pair(0, 0);
		EList<MerIndexThread> threads;
		{
			Timer t(cerr, "... ", timing);
			if(timing) cerr << "Preparing to extract sub-sequences..." << endl;
			// Instantiate and run index threads
			assert_gt(nthreads, 0);
			threads.resize(nthreads);
			for(int i = 0; i < nthreads; i++) {
				threads[i].runCount(&refs, ind.get(), i, nthreads, color);
			}
			for(int i = 0; i < nthreads; i++) {
				pair<size_t, size_t> mrs = threads[i].join();
				mers.first += mrs.first;
				mers.second += mrs.second;
			}
			ind->allocateMers();
		}
		if(timing || verbose || justBlowup) {
			cerr << "Expecting index footprint of ";
			printBytes(mers.first * sizeof(mer_ent), cerr);
			cerr << endl;
			if(mers.first > mers.second) {
				cerr.setf(ios::fixed);
				cerr << "  base footprint is ";
				printBytes(mers.second * sizeof(mer_ent), cerr);
				cerr << endl
				     << "  blowup factor: " << setprecision(2) << ((double)mers.first / (double)mers.second) << endl;
			}
			if(justBlowup) throw 0;
		}
		{
			Timer t(cerr, "... ", timing);
			if(timing) cerr << "Extracting index sub-sequences..." << endl;
			// Instantiate and run index threads
			for(int i = 0; i < nthreads; i++) {
				threads[i].runIndex(&refs, ind.get(), i, nthreads, color);
			}
			for(int i = 0; i < nthreads; i++) threads[i].join();
		}
		assert_eq(mers.first, ind->size());
		if(verbose) {
			cout << "  read " << refs.numRefs() << " reference strings" << endl;
		}
		if(refs.empty() && iformat != INPUT_CHAININ) {
			cerr << "Index is empty; not enough reference sequence supplied" << endl;
			throw 1;
		}
		if(refs.numRefs() == 0 && iformat != INPUT_CHAININ) {
			cerr << "No reference strings provided; aborting..." << endl;
			throw 1;
		}
		{
			Timer t(cerr, "Sorting reference mers: ", timing);
			ind->sort(nthreads); // sort mers
		}
		{
			Timer t(cerr, "... ", timing);
			if(timing) cerr << "Aligning reads..." << endl;
			string rstr = argv[optind++];
			// Instantiate reference map, which translates to new reference
			// coordinate system prior to alignment output
			auto_ptr<ReferenceMap> rmap(
				refmapFile == NULL ? NULL : new ReferenceMap(refmapFile, !refidx));
			// Instantiate annotation map, which encodes SNP locations & alleles
			auto_ptr<AnnotationMap> amap(
				annotFile == NULL ? NULL : new AnnotationMap(annotFile));
			// Instantiate the read-input object
			auto_ptr<Reads> rs(
				(iformat == INPUT_CMDLINE) ?
					(Reads*)new StringReads(rstr, begin) :
					((iformat == INPUT_FASTA) ?
						(Reads*)new FastaReads(rstr, begin, bufsz) :
						((iformat == INPUT_FASTA_CONT) ?
							(Reads*)new FastaContinuousReads(
								rstr, begin, fastaContLen,
								fastaContFreq, fcontBis, fcontRc,
								color) :
							((iformat == INPUT_FASTQ) ?
								(Reads*)new FastqReads(rstr, solexaScale, sixty4off, begin, bufsz) :
									((iformat == INPUT_CHAININ) ?
										(Reads*)new ChainReads(rstr, begin, bufsz) :
											((iformat == INPUT_CSFASTA) ?
												(Reads*)new CSFastaReads(rstr, begin, bufsz) :
													((iformat == INPUT_CSFASTA_AND_QV) ?
														(Reads*)new CSFastaAndQVReads(rstr, qualFile, begin, bufsz) :
														(Reads*)new CSFastqReads(rstr, solexaScale, sixty4off, begin, bufsz))))))));
			// Set output stream
			string of = "-";
			if(optind < argc) of = argv[optind++];
			// Instantiate the alignment-output object
			auto_ptr<AlignOutput> outs(
				(oformat == OUTPUT_SAM) ?
					(AlignOutput*)new SamOutput(of, fullref, refidx, rp.bisulfiteC || rp.bisulfiteCpG, !samNoCsCq) :
					(AlignOutput*)new BowtieOutput(of, fullref, printCost, refidx, rp.bisulfiteC || rp.bisulfiteCpG));
			outs->printHeader(refnames, reflens);
			// Run the progress thread, if requested
			ProgressThread proThread;
			if(progress) proThread.run();
			// Instantiate and run search threads
			EList<SearchThread> sthreads;
			sthreads.resize(nthreads);
			for(int i = 0; i < (int)sthreads.size(); i++) {
				sthreads[i].init(
					i, (int)sthreads.size(), ind.get(), rs.get(), &refs,
					outs.get(), rmap.get(), amap.get());
				sthreads[i].run();
			}
			// Wait until search sthreads are finished
			for(size_t i = 0; i < sthreads.size(); i++) {
				sthreads[i].join();
			}
			if(progress) {
				proThread.kill();
				proThread.join();
			}
			outs->flush();
		}
		if(!quiet) ProgressThread::reportStats();
	} catch(exception& e) {
		cerr << "Command: ";
		for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
		cerr << endl;
		return 1;
	} catch(int e) {
		if(e != 0) {
			cerr << "Command: ";
			for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
			cerr << endl;
		}
		return e;
	}
	return 0;
}
예제 #12
0
static void driver(
	const char * type,
	const string& bt2indexBase,
	const string& cf_out)
{
	if(gVerbose || startVerbose)  {
		cerr << "Entered driver(): "; logTime(cerr, true);
	}

    //initializeCntLut();  // FB: test commenting

	// Vector of the reference sequences; used for sanity-checking
	EList<SString<char> > names, os;
	EList<size_t> nameLens, seqLens;

	// Initialize Ebwt object and read in header
	if(gVerbose || startVerbose) {
		cerr << "About to initialize fw Ebwt: "; logTime(cerr, true);
	}
	adjIdxBase = adjustEbwtBase(argv0, bt2indexBase, gVerbose);
	Ebwt<index_t> ebwt(
		adjIdxBase,
	    0,        // index is colorspace
		-1,       // fw index
	    true,     // index is for the forward direction
	    /* overriding: */ offRate,
		0, // amount to add to index offrate or <= 0 to do nothing
	    useMm,    // whether to use memory-mapped files
	    useShmem, // whether to use shared memory
	    mmSweep,  // sweep memory-mapped files
	    !noRefNames, // load names?
		true,        // load SA sample?
		true,        // load ftab?
		true,        // load rstarts?
	    gVerbose, // whether to be talkative
	    startVerbose, // talkative during initialization
	    false /*passMemExc*/,
	    sanityCheck);
	//Ebwt<index_t>* ebwtBw = NULL;


	EList<size_t> reflens;
	EList<string> refnames;
	readEbwtRefnames<index_t>(adjIdxBase, refnames);
	map<uint32_t,pair<string,uint64_t> > speciesID_to_name_len;
	for(size_t i = 0; i < ebwt.nPat(); i++) {
		// cerr << "Push back to reflens: "<<  refnames[i] << " is so long: " << ebwt.plen()[i] << endl;
		reflens.push_back(ebwt.plen()[i]);

		// extract numeric id from refName
		const string& refName = refnames[i];
		uint64_t id = extractIDFromRefName(refName);
		uint32_t speciesID = (uint32_t)(id >> 32);

		// extract name from refName
		const string& name_part = refName.substr(refName.find_first_of(' '));

		//uint32_t genusID = (uint32_t)(id & 0xffffffff);
		speciesID_to_name_len[speciesID] = pair<string,uint64_t>(name_part,ebwt.plen()[i]);

	}
//	EList<string> refnames;
//	readEbwtRefnames<index_t>(adjIdxBase, refnames);

	// Read Centrifuge output file
	ifstream infile(cf_out.c_str());

	string line;
	map<uint32_t,uint32_t> species_to_score;

	while (getline(infile,line)) {
		string rd_name;
		uint32_t genusID;
		uint32_t speciesID;
		uint32_t score;
		uint32_t secbest_score;

		istringstream iss(line);
		iss >> rd_name >> genusID >> speciesID >> score >> secbest_score;
		// cerr << rd_name << " -> " << genusID << " -> " << speciesID << " -> " << score << " -> " << secbest_score << "\n";
		species_to_score[speciesID] += score;
	}

	// Sort the species by their score
	vector<pair<uint32_t,uint32_t> > species_to_score_v(species_to_score.begin(), species_to_score.end());

	sort(species_to_score_v.begin(),species_to_score_v.end(),Pair2ndComparator<uint32_t>());

	cout << "Name\tTaxonID\tLength\tSummed Score\tNormalized Score\n";
	// Output the summed species scores
	for (vector<pair<uint32_t,uint32_t> >::iterator species_score = species_to_score_v.begin();
			species_score != species_to_score_v.end();
			++species_score) {
		uint32_t speciesID = species_score->first;
		pair<string,uint64_t> name_len = speciesID_to_name_len[speciesID];
		uint64_t slength = name_len.second;
		uint64_t sumscore = species_score->second;

		cout << name_len.first << "\t" <<
				speciesID << "\t" <<
				slength << "\t" <<
				sumscore << "\t" <<
				(float)sumscore/slength << "\n";
	}



}
예제 #13
0
파일: pat.cpp 프로젝트: eXistence/fastflow
/**
 * Given the values for all of the various arguments used to specify
 * the read and quality input, create a list of pattern sources to
 * dispense them.
 */
PairedPatternSource* PairedPatternSource::setupPatternSources(
	const EList<string>& si,   // singles, from argv
	const EList<string>& m1,   // mate1's, from -1 arg
	const EList<string>& m2,   // mate2's, from -2 arg
	const EList<string>& m12,  // both mates on each line, from --12 arg
	const EList<string>& q,    // qualities associated with singles
	const EList<string>& q1,   // qualities associated with m1
	const EList<string>& q2,   // qualities associated with m2
	const PatternParams& p,    // read-in parameters
	bool verbose)              // be talkative?
{
	//std::cout << "setupPatternSources\n";
	EList<PatternSource*>* a  = new EList<PatternSource*>();
	EList<PatternSource*>* b  = new EList<PatternSource*>();
	EList<PatternSource*>* ab = new EList<PatternSource*>();
	// Create list of pattern sources for paired reads appearing
	// interleaved in a single file
	for(size_t i = 0; i < m12.size(); i++) {
		const EList<string>* qs = &m12;
		EList<string> tmp;
		if(p.fileParallel) {
			// Feed query files one to each PatternSource
			qs = &tmp;
			tmp.push_back(m12[i]);
			assert_eq(1, tmp.size());
		}
		ab->push_back(PatternSource::patsrcFromStrings(p, *qs));
		if(!p.fileParallel) {
			break;
		}
	}

	// Create list of pattern sources for paired reads
	for(size_t i = 0; i < m1.size(); i++) {
		const EList<string>* qs = &m1;
		EList<string> tmpSeq;
		EList<string> tmpQual;
		if(p.fileParallel) {
			// Feed query files one to each PatternSource
			qs = &tmpSeq;
			tmpSeq.push_back(m1[i]);
			assert_eq(1, tmpSeq.size());
		}
		a->push_back(PatternSource::patsrcFromStrings(p, *qs));
		if(!p.fileParallel) {
			break;
		}
	}

	// Create list of pattern sources for paired reads
	for(size_t i = 0; i < m2.size(); i++) {
		const EList<string>* qs = &m2;
		EList<string> tmpSeq;
		EList<string> tmpQual;
		if(p.fileParallel) {
			// Feed query files one to each PatternSource
			qs = &tmpSeq;
			tmpSeq.push_back(m2[i]);
			assert_eq(1, tmpSeq.size());
		}
		b->push_back(PatternSource::patsrcFromStrings(p, *qs));
		if(!p.fileParallel) {
			break;
		}
	}
	// All mates/mate files must be paired
	assert_eq(a->size(), b->size());

	// Create list of pattern sources for the unpaired reads
	for(size_t i = 0; i < si.size(); i++) {
		const EList<string>* qs = &si;
		PatternSource* patsrc = NULL;
		EList<string> tmpSeq;
		EList<string> tmpQual;
		if(p.fileParallel) {
			// Feed query files one to each PatternSource
			qs = &tmpSeq;
			tmpSeq.push_back(si[i]);
			assert_eq(1, tmpSeq.size());
		}
		patsrc = PatternSource::patsrcFromStrings(p, *qs);
		assert(patsrc != NULL);
		a->push_back(patsrc);
		b->push_back(NULL);
		if(!p.fileParallel) {
			break;
		}
	}

	PairedPatternSource *patsrc = NULL;
	if(m12.size() > 0) {
		patsrc = new PairedSoloPatternSource(ab, p);
		for(size_t i = 0; i < a->size(); i++) delete (*a)[i];
		for(size_t i = 0; i < b->size(); i++) delete (*b)[i];
		delete a; delete b;
	} else {
		patsrc = new PairedDualPatternSource(a, b, p);
		for(size_t i = 0; i < ab->size(); i++) delete (*ab)[i];
		delete ab;
	}
	return patsrc;
}
예제 #14
0
/**
 * Reverse the 'src' list of RefRecords into the 'dst' list.  Don't
 * modify 'src'.
 */
void reverseRefRecords(
	const EList<RefRecord>& src,
	EList<RefRecord>& dst,
	bool recursive,
	bool verbose)
{
	dst.clear();
	{
		EList<RefRecord> cur;
		for(int i = (int)src.size()-1; i >= 0; i--) {
			bool first = (i == (int)src.size()-1 || src[i+1].first);
			// Clause after the || on next line is to deal with empty FASTA
			// records at the end of the 'src' list, which would be wrongly
			// omitted otherwise.
			if(src[i].len || (first && src[i].off == 0)) {
				cur.push_back(RefRecord(0, src[i].len, first));
				first = false;
			}
			if(src[i].off) cur.push_back(RefRecord(src[i].off, 0, first));
		}
		bool mergedLast;
		for(int i = 0; i < (int)cur.size(); i++) {
			mergedLast = false;
			assert(cur[i].off == 0 || cur[i].len == 0);
			if(i < (int)cur.size()-1 && cur[i].off != 0 && !cur[i+1].first) {
				dst.push_back(RefRecord(cur[i].off, cur[i+1].len, cur[i].first));
				i++;
				mergedLast = true;
			} else {
				dst.push_back(cur[i]);
			}
		}
	}
	//if(verbose) {
	//	cout << "Source: " << endl;
	//	printRecords(cout, src);
	//	cout << "Dest: " << endl;
	//	printRecords(cout, dst);
	//}
#ifndef NDEBUG
	size_t srcnfirst = 0, dstnfirst = 0;
	for(size_t i = 0; i < src.size(); i++) {
		if(src[i].first) {
			srcnfirst++;
		}
	}
	for(size_t i = 0; i < dst.size(); i++) {
		if(dst[i].first) {
			dstnfirst++;
		}
	}
	assert_eq(srcnfirst, dstnfirst);
	if(!recursive) {
		EList<RefRecord> tmp;
		reverseRefRecords(dst, tmp, true);
		assert_eq(tmp.size(), src.size());
		for(size_t i = 0; i < src.size(); i++) {
			assert_eq(src[i].len, tmp[i].len);
			assert_eq(src[i].off, tmp[i].off);
			assert_eq(src[i].first, tmp[i].first);
		}
	}
#endif
}
예제 #15
0
int main(void) {
	cerr << "Test inter-class comparison operators...";
	{
		SString<int> s(2);
		s.set('a', 0);
		s.set('b', 1);
		assert(sstr_eq(s, (const char *)"ab"));
		assert(!sstr_neq(s, (const char *)"ab"));
		assert(!sstr_lt(s, (const char *)"ab"));
		assert(!sstr_gt(s, (const char *)"ab"));
		assert(sstr_leq(s, (const char *)"ab"));
		assert(sstr_geq(s, (const char *)"ab"));
		
		SStringExpandable<int> s2;
		s2.append('a');
		s2.append('b');
		assert(sstr_eq(s, s2));
		assert(sstr_eq(s2, (const char *)"ab"));
		assert(!sstr_neq(s, s2));
		assert(!sstr_neq(s2, (const char *)"ab"));
		assert(!sstr_lt(s, s2));
		assert(!sstr_lt(s2, (const char *)"ab"));
		assert(!sstr_gt(s, s2));
		assert(!sstr_gt(s2, (const char *)"ab"));
		assert(sstr_leq(s, s2));
		assert(sstr_leq(s2, (const char *)"ab"));
		assert(sstr_geq(s, s2));
		assert(sstr_geq(s2, (const char *)"ab"));

		SStringFixed<int, 12> s3;
		s3.append('a');
		s3.append('b');
		assert(sstr_eq(s, s3));
		assert(sstr_eq(s2, s3));
		assert(sstr_eq(s3, (const char *)"ab"));
		assert(!sstr_neq(s, s3));
		assert(!sstr_neq(s2, s3));
		assert(!sstr_neq(s3, (const char *)"ab"));
		assert(!sstr_lt(s, s3));
		assert(!sstr_lt(s2, s3));
		assert(!sstr_lt(s3, (const char *)"ab"));
		assert(!sstr_gt(s, s3));
		assert(!sstr_gt(s2, s3));
		assert(!sstr_gt(s3, (const char *)"ab"));
		assert(sstr_geq(s, s3));
		assert(sstr_geq(s2, s3));
		assert(sstr_geq(s3, (const char *)"ab"));
		assert(sstr_leq(s, s3));
		assert(sstr_leq(s2, s3));
		assert(sstr_leq(s3, (const char *)"ab"));
	}
	cerr << "PASSED" << endl;
	
	cerr << "Test flag for whether to consider end-of-word < other chars ...";
	{
		SString<char> ss("String");
		SString<char> sl("String1");
		assert(sstr_lt(ss, sl));
		assert(sstr_gt(ss, sl, false));
		assert(sstr_leq(ss, sl));
		assert(sstr_geq(ss, sl, false));
	}
	cerr << "PASSED" << endl;
	
	cerr << "Test toZBuf and toZBufXForm ...";
	{
		SString<uint32_t> s(10);
		for(int i = 0; i < 10; i++) {
			s[i] = (uint32_t)i;
		}
		assert(strcmp(s.toZBufXForm("0123456789"), "0123456789") == 0);
	}
	cerr << "PASSED" << endl;

	cerr << "Test S2bDnaString ...";
	{
		const char *str =
			"ACGTACGTAC" "ACGTACGTAC" "ACGTACGTAC"
			"ACGTACGTAC" "ACGTACGTAC" "ACGTACGTAC";
		const char *gs =
			"GGGGGGGGGG" "GGGGGGGGGG" "GGGGGGGGGG"
			"GGGGGGGGGG" "GGGGGGGGGG" "GGGGGGGGGG";
		for(size_t i = 0; i < 60; i++) {
			S2bDnaString s(str, i, true);
			S2bDnaString sr;
			BTDnaString s2(str, i, true);
			assert(sstr_eq(s, s2));
			if(i >= 10) {
				BTDnaString s3;
				s.windowGetDna(s3, true, 3, 4);
				assert(sstr_eq(s3.toZBuf(), (const char*)"TACG"));
				s.windowGetDna(s3, false, 3, 4);
				assert(sstr_eq(s3.toZBuf(), (const char*)"CGTA"));
				assert_eq('A', s.toChar(0));
				assert_eq('G', s.toChar(2));
				assert_eq('A', s.toChar(4));
				assert_eq('G', s.toChar(6));
				assert_eq('A', s.toChar(8));
				
				s.reverseWindow(1, 8);
				s2.reverseWindow(1, 8);
				
				assert_eq('A', s.toChar(1));
				assert_eq('T', s.toChar(2));
				assert_eq('G', s.toChar(3));
				assert_eq('C', s.toChar(4));
				assert_eq('A', s.toChar(5));
				assert_eq('T', s.toChar(6));
				assert_eq('G', s.toChar(7));
				assert_eq('C', s.toChar(8));
				assert(sstr_eq(s, s2));

				s.reverseWindow(1, 8);
				s2.reverseWindow(1, 8);
				assert(sstr_eq(s, s2));
			}
			if(i > 1) {
				s.reverse();
				sr.installReverseChars(str, i);
				s2.reverse();
				assert(sstr_eq(s, s2));
				assert(sstr_eq(sr, s2));
				s.reverse();
				sr.reverse();
				assert(sstr_neq(s, s2));
				assert(sstr_neq(sr, s2));
				s.fill(2);
				s2.reverse();
				assert(sstr_leq(s, gs));
				assert(sstr_gt(s, s2));
				assert(sstr_gt(s, sr));
				s2.fill(2);
				sr.fill(2);
				assert(sstr_eq(s, s2));
				assert(sstr_eq(s, sr));
			}
		}
		S2bDnaString s(str, true);
		S2bDnaString sr;
		BTDnaString s2(str, true);
		assert(sstr_eq(s2.toZBuf(), str));
		assert(sstr_eq(s, s2));
		s.reverse();
		sr.installReverseChars(str);
		s2.reverse();
		assert(sstr_eq(s, s2));
		assert(sstr_eq(sr, s2));
		s.reverse();
		sr.reverse();
		assert(sstr_neq(s, s2));
		assert(sstr_neq(sr, s2));
	}
	cerr << "PASSED" << endl;

	cerr << "Test operator=() ...";
	{
		S2bDnaString s;
		s.installChars(string("gtcagtca"));
		assert(sstr_eq(s.toZBuf(), (const char *)"GTCAGTCA"));
	}
	cerr << "PASSED" << endl;
	
	cerr << "Conversions from string ...";
	{
		SStringExpandable<char> se(string("hello"));
		EList<SStringExpandable<char> > sel;
		sel.push_back(SStringExpandable<char>(string("hello")));
	}
	cerr << "PASSED" << endl;
	
	cerr << "PASSED" << endl;
}