BlockInputStreams StorageStripeLog::read( const Names & column_names, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum & processed_stage, const size_t /*max_block_size*/, unsigned num_streams) { std::shared_lock<std::shared_mutex> lock(rwlock); check(column_names); processed_stage = QueryProcessingStage::FetchColumns; NameSet column_names_set(column_names.begin(), column_names.end()); if (!Poco::File(full_path() + "index.mrk").exists()) return { std::make_shared<NullBlockInputStream>() }; CompressedReadBufferFromFile index_in(full_path() + "index.mrk", 0, 0, INDEX_BUFFER_SIZE); std::shared_ptr<const IndexForNativeFormat> index{std::make_shared<IndexForNativeFormat>(index_in, column_names_set)}; BlockInputStreams res; size_t size = index->blocks.size(); if (num_streams > size) num_streams = size; for (size_t stream = 0; stream < num_streams; ++stream) { IndexForNativeFormat::Blocks::const_iterator begin = index->blocks.begin(); IndexForNativeFormat::Blocks::const_iterator end = index->blocks.begin(); std::advance(begin, stream * size / num_streams); std::advance(end, (stream + 1) * size / num_streams); res.emplace_back(std::make_shared<StripeLogBlockInputStream>( *this, context.getSettingsRef().max_read_buffer_size, index, begin, end)); } /// We do not keep read lock directly at the time of reading, because we read ranges of data that do not change. return res; }
explicit MMappedFastaFile(std::string const & _filename) : filename(_filename) { struct stat st; stat(_filename.c_str(), &st); filesize = (size_t) st.st_size; fd = open(_filename.c_str(), O_RDONLY, 0); assert(fd != -1); #if __APPLE__ base = (uint8_t *) mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0); #else base = (uint8_t *) mmap(NULL, filesize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd, 0); #endif if(base == MAP_FAILED) { const int err = errno; error("Cannot mmap %s (errno=%i / %s) -- do you have enough memory available?", _filename.c_str(), err, strerror(err)); } auto fai_fp = fopen((filename + ".fai").c_str(), "r"); if(!fai_fp) { assert(fai_build(filename.c_str()) == 0); } else { fclose(fai_fp); } std::ifstream index_in(filename + ".fai"); while(index_in.good()) { std::string line; std::getline(index_in, line); std::vector<std::string> parts; stringutil::split(line, parts, "\n\t", false); if(parts.size() == 5) { const std::string contig = parts[0]; index_entry ientry; sscanf(parts[1].c_str(), "%zu", &ientry.length); sscanf(parts[2].c_str(), "%zu", &ientry.start_offset); sscanf(parts[3].c_str(), "%zu", &ientry.chars_per_line); sscanf(parts[4].c_str(), "%zu", &ientry.bytes_per_line); fai[contig] = ientry; // determine non-N length; we don't do this because it's pretty slow. // size_t offset = ientry.start_offset; // const size_t line_number = (ientry.length - 1) / ientry.chars_per_line; // size_t offset_in_line = (ientry.length - 1) % ientry.chars_per_line; // size_t offset_end = ientry.start_offset + line_number*ientry.bytes_per_line + offset_in_line + 1; // size_t ns = 0; // // while(offset < offset_end) // { // if(std::tolower(base[offset - 1]) == 'n') // { // ++ns; // } // ++offset; // } // fai[contig].non_n_length = fai[contig].length - ns; // length trimming off N's at start and end size_t pos = 0; size_t ns_at_start = 0; bool done = false; // start of contig while(pos < ientry.length && !done) { const std::string s = get(contig, pos, 10000); for(size_t j = 0; j < s.size(); ++j) { if(std::tolower(s.at(j)) == 'n') { ++ns_at_start; } else { done = true; break; } } pos += s.size(); } size_t ns_at_end = 0; // check if we had all Ns if(ns_at_start < ientry.length) { const size_t line_number = (ientry.length - 1) / ientry.chars_per_line; size_t offset_in_line = (ientry.length - 1) % ientry.chars_per_line; size_t offset = ientry.start_offset + line_number*ientry.bytes_per_line + offset_in_line + 1; // end of contig while(offset > ientry.start_offset) { if (base[offset - 1] == '\n' || base[offset - 1] == '\r') { // skip newlines } else if(std::tolower(base[offset - 1]) == 'n') { ++ns_at_end; } else { break; } --offset; } } fai[contig].non_n_length = fai[contig].length - (ns_at_start + ns_at_end); } else if(parts.size() > 0) { error("invalid fai line %s in %s", line.c_str(), (filename + ".fai").c_str()); } } }
const MX mx_in(const std::string & iname) const { return mx_in(index_in(iname)); }
const SX sx_in(const std::string& iname) const { return sx_in(index_in(iname)); }
void set_jac_sparsity(const Sparsity& sp, const std::string &iind, const std::string &oind, bool compact=false) { set_jac_sparsity(sp, index_in(iind), index_out(oind), compact); }
const Sparsity sparsity_jac(const std::string &iind, const std::string &oind, bool compact=false, bool symmetric=false) const { return sparsity_jac(index_in(iind), index_out(oind), compact, symmetric); }