/** \brief Return the size of the inductive datatype. Pre-condition: The given argument constains the parameters of an inductive datatype. */ static sort_size get_datatype_size(parameter const * parameters) { unsigned num_types = parameters[0].get_int(); unsigned tid = parameters[1].get_int(); buffer<sort_size> szs(num_types, sort_size()); buffer<status> already_found(num_types, WHITE); buffer<unsigned> todo; todo.push_back(tid); while (!todo.empty()) { unsigned tid = todo.back(); if (already_found[tid] == BLACK) { todo.pop_back(); continue; } already_found[tid] = GRAY; unsigned o = parameters[2 + 2*tid + 1].get_int(); // constructor offset unsigned num_constructors = parameters[o].get_int(); bool is_very_big = false; bool can_process = true; for (unsigned s = 1; s <= num_constructors; s++) { unsigned k_i = parameters[o+s].get_int(); unsigned num_accessors = parameters[k_i+2].get_int(); for (unsigned r = 0; r < num_accessors; r++) { parameter const & a_type = parameters[k_i+4 + 2*r]; if (a_type.is_int()) { int tid_prime = a_type.get_int(); switch (already_found[tid_prime]) { case WHITE: todo.push_back(tid_prime); can_process = false; break; case GRAY: // type is recursive return sort_size(); case BLACK: break; } } else { SASSERT(a_type.is_ast()); sort * ty = to_sort(a_type.get_ast()); if (ty->is_infinite()) { // type is infinite return sort_size(); } else if (ty->is_very_big()) { is_very_big = true; } } } } if (can_process) { todo.pop_back(); already_found[tid] = BLACK; if (is_very_big) { szs[tid] = sort_size::mk_very_big(); } else { // the type is not infinite nor the number of elements is infinite... // computing the number of elements rational num; for (unsigned s = 1; s <= num_constructors; s++) { unsigned k_i = parameters[o+s].get_int(); unsigned num_accessors = parameters[k_i+2].get_int(); rational c_num(1); for (unsigned r = 0; r < num_accessors; r++) { parameter const & a_type = parameters[k_i+4 + 2*r]; if (a_type.is_int()) { int tid_prime = a_type.get_int(); SASSERT(!szs[tid_prime].is_infinite() && !szs[tid_prime].is_very_big()); c_num *= rational(szs[tid_prime].size(),rational::ui64()); } else { SASSERT(a_type.is_ast()); sort * ty = to_sort(a_type.get_ast()); SASSERT(!ty->is_infinite() && !ty->is_very_big()); c_num *= rational(ty->get_num_elements().size(), rational::ui64()); } } num += c_num; } szs[tid] = sort_size(num); } } } return szs[tid]; }
static void driver( const string& infile, EList<string>& infiles, const string& snpfile, const string& htfile, const string& ssfile, const string& exonfile, const string& svfile, const string& outfile, bool packed, int reverse) { initializeCntLut(); initializeCntBit(); EList<FileBuf*> is(MISC_CAT); bool bisulfite = false; RefReadInParams refparams(false, reverse, nsToAs, bisulfite); assert_gt(infiles.size(), 0); if(format == CMDLINE) { // Adapt sequence strings to stringstreams open for input stringstream *ss = new stringstream(); for(size_t i = 0; i < infiles.size(); i++) { (*ss) << ">" << i << endl << infiles[i].c_str() << endl; } FileBuf *fb = new FileBuf(ss); assert(fb != NULL); assert(!fb->eof()); assert(fb->get() == '>'); ASSERT_ONLY(fb->reset()); assert(!fb->eof()); is.push_back(fb); } else { // Adapt sequence files to ifstreams for(size_t i = 0; i < infiles.size(); i++) { FILE *f = fopen(infiles[i].c_str(), "r"); if (f == NULL) { cerr << "Error: could not open "<< infiles[i].c_str() << endl; throw 1; } FileBuf *fb = new FileBuf(f); assert(fb != NULL); if(fb->peek() == -1 || fb->eof()) { cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl; continue; } assert(!fb->eof()); assert(fb->get() == '>'); ASSERT_ONLY(fb->reset()); assert(!fb->eof()); is.push_back(fb); } } if(is.empty()) { cerr << "Warning: All fasta inputs were empty" << endl; throw 1; } filesWritten.push_back(outfile + ".1." + gfm_ext); filesWritten.push_back(outfile + ".2." + gfm_ext); // Vector for the ordered list of "records" comprising the input // sequences. A record represents a stretch of unambiguous // characters in one of the input sequences. EList<RefRecord> szs(MISC_CAT); std::pair<size_t, size_t> sztot; { if(verbose) cerr << "Reading reference sizes" << endl; Timer _t(cerr, " Time reading reference sizes: ", verbose); if(!reverse && (writeRef || justRef)) { filesWritten.push_back(outfile + ".3." + gfm_ext); filesWritten.push_back(outfile + ".4." + gfm_ext); sztot = BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, szs, sanityCheck); } else { sztot = BitPairReference::szsFromFasta(is, string(), bigEndian, refparams, szs, sanityCheck); } } if(justRef) return; assert_gt(sztot.first, 0); assert_gt(sztot.second, 0); assert_gt(szs.size(), 0); // Construct index from input strings and parameters filesWritten.push_back(outfile + ".5." + gfm_ext); filesWritten.push_back(outfile + ".6." + gfm_ext); filesWritten.push_back(outfile + ".7." + gfm_ext); filesWritten.push_back(outfile + ".8." + gfm_ext); TStr s; HGFM<TIndexOffU> hGFM( s, packed, 1, // TODO: maybe not? lineRate, offRate, // suffix-array sampling rate ftabChars, // number of chars in initial arrow-pair calc localOffRate, localFtabChars, nthreads, snpfile, htfile, ssfile, exonfile, svfile, outfile, // basename for .?.ht2 files reverse == 0, // fw !entireSA, // useBlockwise bmax, // block size for blockwise SA builder bmaxMultSqrt, // block size as multiplier of sqrt(len) bmaxDivN, // block size as divisor of len noDc? 0 : dcv,// difference-cover period is, // list of input streams szs, // list of reference sizes (TIndexOffU)sztot.first, // total size of all unambiguous ref chars refparams, // reference read-in parameters seed, // pseudo-random number generator seed -1, // override offRate verbose, // be talkative autoMem, // pass exceptions up to the toplevel so that we can adjust memory settings automatically sanityCheck); // verify results and internal consistency // Note that the Ebwt is *not* resident in memory at this time. To // load it into memory, call ebwt.loadIntoMemory() if(verbose) { // Print Ebwt's vital stats hGFM.gh().print(cerr); } if(sanityCheck) { // Try restoring the original string (if there were // multiple texts, what we'll get back is the joined, // padded string, not a list) hGFM.loadIntoMemory( reverse ? (refparams.reverse == REF_READ_REVERSE) : 0, true, // load SA sample? true, // load ftab? true, // load rstarts? false, false); SString<char> s2; hGFM.restore(s2); hGFM.evictFromMemory(); { SString<char> joinedss = GFM<>::join<SString<char> >( is, // list of input streams szs, // list of reference sizes (TIndexOffU)sztot.first, // total size of all unambiguous ref chars refparams, // reference read-in parameters seed); // pseudo-random number generator seed if(refparams.reverse == REF_READ_REVERSE) { joinedss.reverse(); } assert_eq(joinedss.length(), s2.length()); assert(sstr_eq(joinedss, s2)); } if(verbose) { if(s2.length() < 1000) { cout << "Passed restore check: " << s2.toZBuf() << endl; } else { cout << "Passed restore check: (" << s2.length() << " chars)" << endl; } } } }