Exemplo n.º 1
0
/**
   \brief Return the size of the inductive datatype.
   Pre-condition: The given argument constains the parameters of an inductive datatype.
*/
static sort_size get_datatype_size(parameter const * parameters) {
    unsigned num_types          = parameters[0].get_int();
    unsigned tid                = parameters[1].get_int();
    buffer<sort_size> szs(num_types, sort_size());
    buffer<status>    already_found(num_types, WHITE);
    buffer<unsigned>  todo;
    todo.push_back(tid);
    while (!todo.empty()) {
        unsigned tid  = todo.back();
        if (already_found[tid] == BLACK) {
            todo.pop_back();
            continue;
        }
        already_found[tid] = GRAY;
        unsigned o                 = parameters[2 + 2*tid + 1].get_int(); // constructor offset
        unsigned num_constructors  = parameters[o].get_int();
        bool     is_very_big       = false;
        bool     can_process       = true;
        for (unsigned s = 1; s <= num_constructors; s++) {
            unsigned k_i           = parameters[o+s].get_int();
            unsigned num_accessors = parameters[k_i+2].get_int();
            for (unsigned r = 0; r < num_accessors; r++) {
                parameter const & a_type = parameters[k_i+4 + 2*r];
                if (a_type.is_int()) {
                    int tid_prime = a_type.get_int();
                    switch (already_found[tid_prime]) {
                    case WHITE:
                        todo.push_back(tid_prime);
                        can_process = false;
                        break;
                    case GRAY:
                        // type is recursive
                        return sort_size();
                    case BLACK:
                        break;
                    }
                }
                else {
                    SASSERT(a_type.is_ast());
                    sort * ty = to_sort(a_type.get_ast());
                    if (ty->is_infinite()) {
                        // type is infinite
                        return sort_size();
                    }
                    else if (ty->is_very_big()) {
                        is_very_big = true;
                    }
                }
            }
        }
        if (can_process) {
            todo.pop_back();
            already_found[tid] = BLACK;
            if (is_very_big) {
                szs[tid] = sort_size::mk_very_big();
            }
            else {
                // the type is not infinite nor the number of elements is infinite...
                // computing the number of elements
                rational num;
                for (unsigned s = 1; s <= num_constructors; s++) {
                    unsigned k_i           = parameters[o+s].get_int();
                    unsigned num_accessors = parameters[k_i+2].get_int();
                    rational c_num(1);
                    for (unsigned r = 0; r < num_accessors; r++) {
                        parameter const & a_type = parameters[k_i+4 + 2*r];
                        if (a_type.is_int()) {
                            int tid_prime = a_type.get_int();
                            SASSERT(!szs[tid_prime].is_infinite() && !szs[tid_prime].is_very_big());
                            c_num *= rational(szs[tid_prime].size(),rational::ui64());
                        }
                        else {
                            SASSERT(a_type.is_ast());
                            sort * ty = to_sort(a_type.get_ast());
                            SASSERT(!ty->is_infinite() && !ty->is_very_big());
                            c_num *= rational(ty->get_num_elements().size(), rational::ui64());
                        }
                    }
                    num += c_num;
                }
                szs[tid] = sort_size(num);
            }
        }
    }
    return szs[tid];
}
Exemplo n.º 2
0
static void driver(
                   const string& infile,
                   EList<string>& infiles,
                   const string& snpfile,
                   const string& htfile,
                   const string& ssfile,
                   const string& exonfile,
                   const string& svfile,
                   const string& outfile,
                   bool packed,
                   int reverse)
{
    initializeCntLut();
    initializeCntBit();
	EList<FileBuf*> is(MISC_CAT);
	bool bisulfite = false;
	RefReadInParams refparams(false, reverse, nsToAs, bisulfite);
	assert_gt(infiles.size(), 0);
	if(format == CMDLINE) {
		// Adapt sequence strings to stringstreams open for input
		stringstream *ss = new stringstream();
		for(size_t i = 0; i < infiles.size(); i++) {
			(*ss) << ">" << i << endl << infiles[i].c_str() << endl;
		}
		FileBuf *fb = new FileBuf(ss);
		assert(fb != NULL);
		assert(!fb->eof());
		assert(fb->get() == '>');
		ASSERT_ONLY(fb->reset());
		assert(!fb->eof());
		is.push_back(fb);
	} else {
		// Adapt sequence files to ifstreams
		for(size_t i = 0; i < infiles.size(); i++) {
			FILE *f = fopen(infiles[i].c_str(), "r");
			if (f == NULL) {
				cerr << "Error: could not open "<< infiles[i].c_str() << endl;
				throw 1;
			}
			FileBuf *fb = new FileBuf(f);
			assert(fb != NULL);
			if(fb->peek() == -1 || fb->eof()) {
				cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl;
				continue;
			}
			assert(!fb->eof());
			assert(fb->get() == '>');
			ASSERT_ONLY(fb->reset());
			assert(!fb->eof());
			is.push_back(fb);
		}
	}
	if(is.empty()) {
		cerr << "Warning: All fasta inputs were empty" << endl;
		throw 1;
	}
    filesWritten.push_back(outfile + ".1." + gfm_ext);
    filesWritten.push_back(outfile + ".2." + gfm_ext);
	// Vector for the ordered list of "records" comprising the input
	// sequences.  A record represents a stretch of unambiguous
	// characters in one of the input sequences.
	EList<RefRecord> szs(MISC_CAT);
	std::pair<size_t, size_t> sztot;
	{
		if(verbose) cerr << "Reading reference sizes" << endl;
		Timer _t(cerr, "  Time reading reference sizes: ", verbose);
		if(!reverse && (writeRef || justRef)) {
			filesWritten.push_back(outfile + ".3." + gfm_ext);
			filesWritten.push_back(outfile + ".4." + gfm_ext);
			sztot = BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, szs, sanityCheck);
		} else {
			sztot = BitPairReference::szsFromFasta(is, string(), bigEndian, refparams, szs, sanityCheck);
		}
	}
	if(justRef) return;
	assert_gt(sztot.first, 0);
	assert_gt(sztot.second, 0);
	assert_gt(szs.size(), 0);
    
	// Construct index from input strings and parameters	
    filesWritten.push_back(outfile + ".5." + gfm_ext);
    filesWritten.push_back(outfile + ".6." + gfm_ext);
    filesWritten.push_back(outfile + ".7." + gfm_ext);
    filesWritten.push_back(outfile + ".8." + gfm_ext);
	TStr s;
	HGFM<TIndexOffU> hGFM(
                          s,
                          packed,
                          1,  // TODO: maybe not?
                          lineRate,
                          offRate,      // suffix-array sampling rate
                          ftabChars,    // number of chars in initial arrow-pair calc
                          localOffRate,
                          localFtabChars,
                          nthreads,
                          snpfile,
                          htfile,
                          ssfile,
                          exonfile,
                          svfile,
                          outfile,      // basename for .?.ht2 files
                          reverse == 0, // fw
                          !entireSA,    // useBlockwise
                          bmax,         // block size for blockwise SA builder
                          bmaxMultSqrt, // block size as multiplier of sqrt(len)
                          bmaxDivN,     // block size as divisor of len
                          noDc? 0 : dcv,// difference-cover period
                          is,           // list of input streams
                          szs,          // list of reference sizes
                          (TIndexOffU)sztot.first,  // total size of all unambiguous ref chars
                          refparams,    // reference read-in parameters
                          seed,         // pseudo-random number generator seed
                          -1,           // override offRate
                          verbose,      // be talkative
                          autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
                          sanityCheck); // verify results and internal consistency
    // Note that the Ebwt is *not* resident in memory at this time.  To
    // load it into memory, call ebwt.loadIntoMemory()
	if(verbose) {
		// Print Ebwt's vital stats
		hGFM.gh().print(cerr);
	}
	if(sanityCheck) {
		// Try restoring the original string (if there were
		// multiple texts, what we'll get back is the joined,
		// padded string, not a list)
		hGFM.loadIntoMemory(
                            reverse ? (refparams.reverse == REF_READ_REVERSE) : 0,
                            true,  // load SA sample?
                            true,  // load ftab?
                            true,  // load rstarts?
                            false,
                            false);
		SString<char> s2;
		hGFM.restore(s2);
		hGFM.evictFromMemory();
		{
			SString<char> joinedss = GFM<>::join<SString<char> >(
				is,          // list of input streams
				szs,         // list of reference sizes
				(TIndexOffU)sztot.first, // total size of all unambiguous ref chars
				refparams,   // reference read-in parameters
				seed);       // pseudo-random number generator seed
			if(refparams.reverse == REF_READ_REVERSE) {
				joinedss.reverse();
			}
			assert_eq(joinedss.length(), s2.length());
			assert(sstr_eq(joinedss, s2));
		}
		if(verbose) {
			if(s2.length() < 1000) {
				cout << "Passed restore check: " << s2.toZBuf() << endl;
			} else {
				cout << "Passed restore check: (" << s2.length() << " chars)" << endl;
			}
		}
	}
}