void profile_decoding(const char* index_filename, double p) { std::default_random_engine rng(1729); std::uniform_real_distribution<double> dist01(0.0, 1.0); IndexType index; logger() << "Loading index from " << index_filename << std::endl; boost::iostreams::mapped_file_source m(index_filename); succinct::mapper::map(index, m); std::vector<uint32_t> values; for (size_t l = 0; l < index.size(); ++l) { if (l % 1000000 == 0) { logger() << l << " lists processed" << std::endl; } auto blocks = index[l].get_blocks(); for (auto const& block: blocks) { // only measure full blocks if (block.size == mixed_block::block_size && dist01(rng) < p) { block.decode_doc_gaps(values); profile_block(values, block.doc_gaps_universe); block.decode_freqs(values); profile_block(values, uint32_t(-1)); } } } logger() << index.size() << " lists processed" << std::endl; }
static bool ReadReadsAndProcessKernel(const Option &opt, IndexType &index) { if (KmerType::max_size() < static_cast<unsigned>(opt.kmer_k + opt.step + 1)) { return false; } xinfo("Selected kmer type size for next k: %u\n", sizeof(KmerType)); AsyncReadReader reader(opt.read_file); KmerCollector<KmerType> collector(opt.kmer_k + opt.step + 1, opt.output_prefix); int64_t num_aligned_reads = 0; int64_t num_total_reads = 0; while (true) { auto read_pkg = reader.Next(); if (read_pkg.size() == 0) { break; } #pragma omp parallel for reduction(+: num_aligned_reads) for (unsigned i = 0; i < read_pkg.size(); ++i) { num_aligned_reads += index.FindNextKmersFromRead(read_pkg, i, &collector) > 0; } num_total_reads += read_pkg.size(); xinfo("Processed: %lld, aligned: %lld. Iterative edges: %llu\n", num_total_reads, num_aligned_reads, collector.collection().size()); } collector.FlushToFile(); xinfo("Total: %lld, aligned: %lld. Iterative edges: %llu\n", num_total_reads, num_aligned_reads, collector.collection().size()); return true; }
MultidimensionalFor::MultidimensionalFor(const IndexType &to): m_dimension(to.size()), m_from(m_dimension, 0), m_to(to), m_position(m_dimension) { goToBegin(); }
IndexType* load_saved_index(const Matrix<ElementType>& dataset, const std::string& filename, Distance distance) { FILE* fin = fopen(filename.c_str(), "rb"); if (fin == NULL) { return NULL; } IndexHeader header = load_header(fin); if (header.data_type != flann_datatype_value<ElementType>::value) { throw FLANNException("Datatype of saved index is different than of the one to be created."); } IndexParams params; params["algorithm"] = header.index_type; IndexType* nnIndex = create_index_by_type<Distance>(header.index_type, dataset, params, distance); rewind(fin); nnIndex->loadIndex(fin); fclose(fin); return nnIndex; }
void BndryRegister::defineDoit (Orientation _face, IndexType _typ, int _in_rad, int _out_rad, int _extent_rad, BoxArray& fsBA) { BL_PROFILE("BndryRegister::defineDoit()"); BL_ASSERT(grids.size() > 0); const int coord_dir = _face.coordDir(); const int lo_side = _face.isLow(); // // Build the BoxArray on which to define the FabSet on this face. // const int N = grids.size(); fsBA.resize(N); #ifdef _OPENMP #pragma omp parallel for #endif for (int idx = 0; idx < N; ++idx) { Box b; // // First construct proper box for direction normal to face. // if (_out_rad > 0) { if (_typ.ixType(coord_dir) == IndexType::CELL) b = BoxLib::adjCell(grids[idx], _face, _out_rad); else b = BoxLib::bdryNode(grids[idx], _face, _out_rad); if (_in_rad > 0) b.grow(_face.flip(), _in_rad); } else { if (_in_rad > 0) { if (_typ.ixType(coord_dir) == IndexType::CELL) b = BoxLib::adjCell(grids[idx], _face, _in_rad); else b = BoxLib::bdryNode(grids[idx], _face, _in_rad); b.shift(coord_dir, lo_side ? _in_rad : -_in_rad); } else BoxLib::Error("BndryRegister::define(): strange values for in_rad, out_rad"); } // // Now alter box in all other index directions. // for (int dir = 0; dir < BL_SPACEDIM; dir++) { if (dir == coord_dir) continue; if (_typ.ixType(dir) == IndexType::NODE) b.surroundingNodes(dir); if (_extent_rad > 0) b.grow(dir,_extent_rad); } BL_ASSERT(b.ok()); fsBA.set(idx,b); } BL_ASSERT(fsBA.ok()); }
OpenMS::TargetedExperiment::Peptide MRMDecoy::shufflePeptide( OpenMS::TargetedExperiment::Peptide peptide, double identity_threshold, int seed, int max_attempts, bool replace_aa_instead_append) { #ifdef DEBUG_MRMDECOY std::cout << " shuffle peptide " << peptide.sequence << std::endl; seed = 41; #endif if (seed == -1) { seed = time(0); } OpenMS::TargetedExperiment::Peptide shuffled = peptide; boost::mt19937 generator(seed); boost::uniform_int<> uni_dist; boost::variate_generator<boost::mt19937&, boost::uniform_int<> > pseudoRNG(generator, uni_dist); typedef std::vector<std::pair<std::string::size_type, std::string> > IndexType; IndexType idx = MRMDecoy::find_all_tryptic(peptide.sequence); std::string aa[] = { "A", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "M", "F", "S", "T", "W", "Y", "V" }; int aa_size = 17; int attempts = 0; // loop: copy the original peptide, attempt to shuffle it and check whether difference is large enough while (MRMDecoy::AASequenceIdentity(peptide.sequence, shuffled.sequence) > identity_threshold && attempts < max_attempts) { shuffled = peptide; std::vector<Size> peptide_index; for (Size i = 0; i < peptide.sequence.size(); i++) { peptide_index.push_back(i); } // we erase the indices where K/P/R are (from the back / in reverse order // to not delete indices we access later) for (IndexType::reverse_iterator it = idx.rbegin(); it != idx.rend(); ++it) { peptide_index.erase(peptide_index.begin() + it->first); } // shuffle the peptide index (without the K/P/R which we leave in place) // one could also use std::random_shuffle here but then the code becomes // untestable since the implementation of std::random_shuffle differs // between libc++ (llvm/mac-osx) and libstdc++ (gcc) and VS // see also https://code.google.com/p/chromium/issues/detail?id=358564 // the actual code here for the shuffling is based on the implementation of // std::random_shuffle in libstdc++ if (peptide_index.begin() != peptide_index.end()) { for (std::vector<Size>::iterator pI_it = peptide_index.begin() + 1; pI_it != peptide_index.end(); ++pI_it) { // swap current position with random element from vector // swapping positions are random in range [0, current_position + 1) // which can be at most [0, n) std::iter_swap(pI_it, peptide_index.begin() + pseudoRNG((pI_it - peptide_index.begin()) + 1)); } } // re-insert the missing K/P/R at the appropriate places for (IndexType::iterator it = idx.begin(); it != idx.end(); ++it) { peptide_index.insert(peptide_index.begin() + it->first, it->first); } // use the shuffled index to create the get the new peptide sequence and // then to place the modifications at their appropriate places (at the // same, shuffled AA where they were before). for (Size i = 0; i < peptide_index.size(); i++) { shuffled.sequence[i] = peptide.sequence[peptide_index[i]]; } for (Size j = 0; j < shuffled.mods.size(); j++) { for (Size k = 0; k < peptide_index.size(); k++) { // C and N terminal mods are implicitly not shuffled because they live at positions -1 and sequence.size() if (boost::numeric_cast<int>(peptide_index[k]) == shuffled.mods[j].location) { shuffled.mods[j].location = boost::numeric_cast<int>(k); break; } } } #ifdef DEBUG_MRMDECOY for (Size j = 0; j < shuffled.mods.size(); j++) { std::cout << " position after shuffling " << shuffled.mods[j].location << " mass difference " << shuffled.mods[j].mono_mass_delta << std::endl; } #endif ++attempts; // If our attempts have failed so far, we will append two random AA to // the sequence and see whether we can achieve sufficient shuffling with // these additional AA added to the sequence. if (attempts % 10 == 9) { if (replace_aa_instead_append) { OpenMS::AASequence shuffled_sequence = TargetedExperimentHelper::getAASequence(shuffled); int res_pos = (pseudoRNG() % aa_size); int pep_pos = -1; size_t pos_trials = 0; while (pep_pos < 0 && pos_trials < shuffled_sequence.size()) { pep_pos = (pseudoRNG() % shuffled_sequence.size()); if (shuffled_sequence[pep_pos].isModified() || (shuffled_sequence.hasNTerminalModification() && pep_pos == 0) || (shuffled_sequence.hasNTerminalModification() && pep_pos == (int)(shuffled_sequence.size() - 1))) { pep_pos = -1; } else { if (pep_pos == 0) { shuffled_sequence = AASequence::fromString(aa[res_pos]) + shuffled_sequence.getSuffix(shuffled_sequence.size() - pep_pos - 1); } else if (pep_pos == (int)(shuffled_sequence.size() - 1)) { shuffled_sequence = shuffled_sequence.getPrefix(pep_pos) + AASequence::fromString(aa[res_pos]); } else { shuffled_sequence = shuffled_sequence.getPrefix(pep_pos) + AASequence::fromString(aa[res_pos]) + shuffled_sequence.getSuffix(shuffled_sequence.size() - pep_pos - 1); } } ++pos_trials; } shuffled.sequence = shuffled_sequence.toUnmodifiedString(); peptide = shuffled; } else { int pos = (pseudoRNG() % aa_size); peptide.sequence.append(aa[pos]); pos = (pseudoRNG() % aa_size); peptide.sequence.append(aa[pos]); // now make the shuffled peptide the same length as the new peptide shuffled = peptide; } } } return shuffled; }
static void bench() { //std::size_t kv_len = 32; std::size_t kv_len = 20; std::size_t key_len = 20; //std::size_t size = 4 * 1048576; // 4 Mi entries; 128 MiB std::size_t size = 123406; // for SOSP paper //std::size_t kv_per_block_list[] = {1, 4, 16}; std::size_t kv_per_block_list[] = {1}; stopwatch ss; // initialize input data key_array arr(kv_len, size); arr.generate_random_keys(0, size); quick_sort::sort(arr, 0, size); key_array arr2(kv_len, size); arr2.generate_random_keys(0, size, 1); for (std::size_t i = 1; i < size; i++) assert(memcmp(arr[i - 1], arr[i], key_len) < 0); for (std::size_t kv_per_block_i = 0; kv_per_block_i < sizeof(kv_per_block_list) / sizeof(kv_per_block_list[0]); kv_per_block_i++) { std::size_t kv_per_block = kv_per_block_list[kv_per_block_i]; //for (std::size_t group_size = 1; group_size <= 1024; group_size *= 2) //for (std::size_t group_size = 128; group_size <= 1024; group_size *= 2) for (std::size_t group_size = 256; group_size <= 256; group_size *= 2) { std::vector<IndexType*> indexes; // construct ss.start(); for (size_t i = 0; i < 1024; i++) { IndexType* s = new IndexType(key_len, size, group_size, 0, kv_per_block); for (std::size_t kv_i = 0; kv_i < size; kv_i++) s->insert(arr[kv_i]); s->flush(); indexes.push_back(s); } ss.stop(); uint64_t const_time = ss.real_time(); // lookup //std::size_t lookups = 10000000 / group_size; const std::size_t lookups = 10000000; // for SOSP paper uint64_t lookup_time_hit = 1; uint64_t lookup_time_miss = 1; for (std::size_t lookup_mode = 0; lookup_mode < 2; lookup_mode++) { srand(0); ss.start(); for (std::size_t lookup_i = 0; lookup_i < lookups; lookup_i++) { std::size_t i = static_cast<std::size_t>(rand()) % indexes.size(); std::size_t kv_i = static_cast<std::size_t>(rand()) % size; if (lookup_mode == 0) { std::size_t idx = indexes[i]->locate(arr[kv_i]); assert(kv_i / kv_per_block == idx / kv_per_block); (void)idx; } else { std::size_t idx = indexes[i]->locate(arr2[kv_i]); (void)idx; } } ss.stop(); if (lookup_mode == 0) lookup_time_hit = ss.real_time(); else lookup_time_miss = ss.real_time(); } printf("kv_per_block: %lu\n", kv_per_block); printf("group_size: %lu\n", group_size); printf("bits_per_key: %lf\n", static_cast<double>(indexes[0]->bit_size()) / static_cast<double>(size)); printf("bits_per_key_trie_only: %lf\n", static_cast<double>(indexes[0]->bit_size_trie_only()) / static_cast<double>(size)); printf("const_time_us: %lf\n", static_cast<double>(const_time) / static_cast<double>(indexes.size() * size) / 1000.); printf("lookup_time_us_hit: %lf\n", static_cast<double>(lookup_time_hit) / static_cast<double>(lookups) / 1000.); printf("lookup_time_us_miss: %lf\n", static_cast<double>(lookup_time_miss) / static_cast<double>(lookups) / 1000.); printf("\n"); for (size_t i = 0; i < indexes.size(); i++) delete indexes[i]; indexes.clear(); } } trie_stats::print(); }