Esempio n. 1
0
    void profile_decoding(const char* index_filename,
                          double p)
    {
        std::default_random_engine rng(1729);
        std::uniform_real_distribution<double> dist01(0.0, 1.0);

        IndexType index;
        logger() << "Loading index from " << index_filename << std::endl;
        boost::iostreams::mapped_file_source m(index_filename);
        succinct::mapper::map(index, m);

        std::vector<uint32_t> values;

        for (size_t l = 0; l < index.size(); ++l) {
            if (l % 1000000 == 0) {
                logger() << l << " lists processed" << std::endl;
            }

            auto blocks = index[l].get_blocks();
            for (auto const& block: blocks) {
                // only measure full blocks
                if (block.size == mixed_block::block_size && dist01(rng) < p) {
                    block.decode_doc_gaps(values);
                    profile_block(values, block.doc_gaps_universe);
                    block.decode_freqs(values);
                    profile_block(values, uint32_t(-1));
                }
            }
        }

        logger() << index.size() << " lists processed" << std::endl;
    }
Esempio n. 2
0
static bool ReadReadsAndProcessKernel(const Option &opt, IndexType &index) {
  if (KmerType::max_size() < static_cast<unsigned>(opt.kmer_k + opt.step + 1)) {
    return false;
  }
  xinfo("Selected kmer type size for next k: %u\n", sizeof(KmerType));
  AsyncReadReader reader(opt.read_file);
  KmerCollector<KmerType> collector(opt.kmer_k + opt.step + 1, opt.output_prefix);
  int64_t num_aligned_reads = 0;
  int64_t num_total_reads = 0;

  while (true) {
    auto read_pkg = reader.Next();
    if (read_pkg.size() == 0) {
      break;
    }
#pragma omp parallel for reduction(+: num_aligned_reads)
    for (unsigned i = 0; i < read_pkg.size(); ++i) {
      num_aligned_reads += index.FindNextKmersFromRead(read_pkg, i, &collector) > 0;
    }
    num_total_reads += read_pkg.size();
    xinfo("Processed: %lld, aligned: %lld. Iterative edges: %llu\n",
          num_total_reads, num_aligned_reads, collector.collection().size());
  }
  collector.FlushToFile();
  xinfo("Total: %lld, aligned: %lld. Iterative edges: %llu\n",
        num_total_reads, num_aligned_reads, collector.collection().size());
  return true;
}
MultidimensionalFor::MultidimensionalFor(const IndexType &to):
	m_dimension(to.size()),
	m_from(m_dimension, 0),
	m_to(to),
	m_position(m_dimension)
{
	goToBegin();
}
Esempio n. 4
0
    IndexType* load_saved_index(const Matrix<ElementType>& dataset, const std::string& filename, Distance distance)
    {
        FILE* fin = fopen(filename.c_str(), "rb");
        if (fin == NULL) {
            return NULL;
        }
        IndexHeader header = load_header(fin);
        if (header.data_type != flann_datatype_value<ElementType>::value) {
            throw FLANNException("Datatype of saved index is different than of the one to be created.");
        }

        IndexParams params;
        params["algorithm"] = header.index_type;
        IndexType* nnIndex = create_index_by_type<Distance>(header.index_type, dataset, params, distance);
        rewind(fin);
        nnIndex->loadIndex(fin);
        fclose(fin);

        return nnIndex;
    }
Esempio n. 5
0
void
BndryRegister::defineDoit (Orientation _face,
                           IndexType   _typ,
                           int         _in_rad,
                           int         _out_rad,
                           int         _extent_rad,
                           BoxArray&   fsBA)
{
    BL_PROFILE("BndryRegister::defineDoit()");

    BL_ASSERT(grids.size() > 0);

    const int coord_dir = _face.coordDir();
    const int lo_side   = _face.isLow();
    //
    // Build the BoxArray on which to define the FabSet on this face.
    //
    const int N = grids.size();

    fsBA.resize(N);

#ifdef _OPENMP
#pragma omp parallel for
#endif
    for (int idx = 0; idx < N; ++idx)
    {
        Box b;
        //
        // First construct proper box for direction normal to face.
        //
        if (_out_rad > 0)
        {
            if (_typ.ixType(coord_dir) == IndexType::CELL)
                b = BoxLib::adjCell(grids[idx], _face, _out_rad);
            else
                b = BoxLib::bdryNode(grids[idx], _face, _out_rad);

            if (_in_rad > 0)
                b.grow(_face.flip(), _in_rad);
        }
        else
        {
            if (_in_rad > 0)
            {
                if (_typ.ixType(coord_dir) == IndexType::CELL)
                    b = BoxLib::adjCell(grids[idx], _face, _in_rad);
                else
                    b = BoxLib::bdryNode(grids[idx], _face, _in_rad);

                b.shift(coord_dir, lo_side ? _in_rad : -_in_rad);
            }
            else
                BoxLib::Error("BndryRegister::define(): strange values for in_rad, out_rad");
        }
        //
        // Now alter box in all other index directions.
        //
        for (int dir = 0; dir < BL_SPACEDIM; dir++)
        {
            if (dir == coord_dir)
                continue;
            if (_typ.ixType(dir) == IndexType::NODE)
                b.surroundingNodes(dir);
            if (_extent_rad > 0)
                b.grow(dir,_extent_rad);
        }

        BL_ASSERT(b.ok());

        fsBA.set(idx,b);
    }

    BL_ASSERT(fsBA.ok());
}
Esempio n. 6
0
  OpenMS::TargetedExperiment::Peptide MRMDecoy::shufflePeptide(
    OpenMS::TargetedExperiment::Peptide peptide, double identity_threshold, int seed,
    int max_attempts, bool replace_aa_instead_append)
  {
#ifdef DEBUG_MRMDECOY
    std::cout << " shuffle peptide " << peptide.sequence << std::endl;
    seed = 41;
#endif
    if (seed == -1)
    {
      seed = time(0);
    }
    OpenMS::TargetedExperiment::Peptide shuffled = peptide;

    boost::mt19937 generator(seed);
    boost::uniform_int<> uni_dist;
    boost::variate_generator<boost::mt19937&, boost::uniform_int<> > pseudoRNG(generator, uni_dist);

    typedef std::vector<std::pair<std::string::size_type, std::string> > IndexType;
    IndexType idx = MRMDecoy::find_all_tryptic(peptide.sequence);
    std::string aa[] =
    {
      "A", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "M", "F", "S", "T", "W",
      "Y", "V"
    };
    int aa_size = 17;

    int attempts = 0;
    // loop: copy the original peptide, attempt to shuffle it and check whether difference is large enough
    while (MRMDecoy::AASequenceIdentity(peptide.sequence, shuffled.sequence) > identity_threshold &&
           attempts < max_attempts)
    {
      shuffled = peptide;
      std::vector<Size> peptide_index;
      for (Size i = 0; i < peptide.sequence.size(); i++)
      {
        peptide_index.push_back(i);
      }

      // we erase the indices where K/P/R are (from the back / in reverse order
      // to not delete indices we access later)
      for (IndexType::reverse_iterator it = idx.rbegin(); it != idx.rend(); ++it)
      {
        peptide_index.erase(peptide_index.begin() + it->first);
      }

      // shuffle the peptide index (without the K/P/R which we leave in place)
      // one could also use std::random_shuffle here but then the code becomes
      // untestable since the implementation of std::random_shuffle differs
      // between libc++ (llvm/mac-osx) and libstdc++ (gcc) and VS
      // see also https://code.google.com/p/chromium/issues/detail?id=358564
      // the actual code here for the shuffling is based on the implementation of
      // std::random_shuffle in libstdc++
      if (peptide_index.begin() != peptide_index.end())
      {
        for (std::vector<Size>::iterator pI_it = peptide_index.begin() + 1; pI_it != peptide_index.end(); ++pI_it)
        {
          // swap current position with random element from vector
          // swapping positions are random in range [0, current_position + 1)
          // which can be at most [0, n)
          std::iter_swap(pI_it, peptide_index.begin() + pseudoRNG((pI_it - peptide_index.begin()) + 1));
        }
      }

      // re-insert the missing K/P/R at the appropriate places
      for (IndexType::iterator it = idx.begin(); it != idx.end(); ++it)
      {
        peptide_index.insert(peptide_index.begin() + it->first, it->first);
      }

      // use the shuffled index to create the get the new peptide sequence and
      // then to place the modifications at their appropriate places (at the
      // same, shuffled AA where they were before).
      for (Size i = 0; i < peptide_index.size(); i++)
      {
        shuffled.sequence[i] = peptide.sequence[peptide_index[i]];
      }
      for (Size j = 0; j < shuffled.mods.size(); j++)
      {
        for (Size k = 0; k < peptide_index.size(); k++)
        {
          // C and N terminal mods are implicitly not shuffled because they live at positions -1 and sequence.size()
          if (boost::numeric_cast<int>(peptide_index[k]) == shuffled.mods[j].location)
          {
            shuffled.mods[j].location = boost::numeric_cast<int>(k);
            break;
          }
        }
      }

#ifdef DEBUG_MRMDECOY
      for (Size j = 0; j < shuffled.mods.size(); j++)
      {
        std::cout << " position after shuffling " << shuffled.mods[j].location << " mass difference " << shuffled.mods[j].mono_mass_delta << std::endl;
      }
#endif

      ++attempts;

      // If our attempts have failed so far, we will append two random AA to
      // the sequence and see whether we can achieve sufficient shuffling with
      // these additional AA added to the sequence.
      if (attempts % 10 == 9)
      {
        if (replace_aa_instead_append)
        {
          OpenMS::AASequence shuffled_sequence = TargetedExperimentHelper::getAASequence(shuffled);
          int res_pos = (pseudoRNG() % aa_size);
          int pep_pos = -1;
          size_t pos_trials = 0;
          while (pep_pos < 0 && pos_trials < shuffled_sequence.size())
          {
            pep_pos = (pseudoRNG() % shuffled_sequence.size());
            if (shuffled_sequence[pep_pos].isModified() || (shuffled_sequence.hasNTerminalModification() && pep_pos == 0) || (shuffled_sequence.hasNTerminalModification() && pep_pos == (int)(shuffled_sequence.size() - 1)))
            {
              pep_pos = -1;
            }
            else
            {
              if (pep_pos == 0)
              {
                shuffled_sequence = AASequence::fromString(aa[res_pos]) + shuffled_sequence.getSuffix(shuffled_sequence.size() - pep_pos - 1);
              }
              else if (pep_pos == (int)(shuffled_sequence.size() - 1))
              {
                shuffled_sequence = shuffled_sequence.getPrefix(pep_pos) + AASequence::fromString(aa[res_pos]);
              }
              else
              {
                shuffled_sequence = shuffled_sequence.getPrefix(pep_pos) + AASequence::fromString(aa[res_pos]) + shuffled_sequence.getSuffix(shuffled_sequence.size() - pep_pos - 1);
              }
            }
            ++pos_trials;
          }
          shuffled.sequence = shuffled_sequence.toUnmodifiedString();
          peptide = shuffled;
        }
        else
        {
          int pos = (pseudoRNG() % aa_size);
          peptide.sequence.append(aa[pos]);
          pos = (pseudoRNG() % aa_size);
          peptide.sequence.append(aa[pos]);
          // now make the shuffled peptide the same length as the new peptide
          shuffled = peptide;
        }
      }
    }

    return shuffled;
  }
Esempio n. 7
0
static
void
bench()
{
	//std::size_t kv_len = 32;
	std::size_t kv_len = 20;
	std::size_t key_len = 20;
	//std::size_t size = 4 * 1048576;	// 4 Mi entries; 128 MiB
	std::size_t size = 123406;	// for SOSP paper

	//std::size_t kv_per_block_list[] = {1, 4, 16};
	std::size_t kv_per_block_list[] = {1};

	stopwatch ss;

	// initialize input data
	key_array arr(kv_len, size);
	arr.generate_random_keys(0, size);
	quick_sort::sort(arr, 0, size);

	key_array arr2(kv_len, size);
	arr2.generate_random_keys(0, size, 1);

	for (std::size_t i = 1; i < size; i++)
		assert(memcmp(arr[i - 1], arr[i], key_len) < 0);

	for (std::size_t kv_per_block_i = 0; kv_per_block_i < sizeof(kv_per_block_list) / sizeof(kv_per_block_list[0]); kv_per_block_i++)
	{
		std::size_t kv_per_block = kv_per_block_list[kv_per_block_i];

		//for (std::size_t group_size = 1; group_size <= 1024; group_size *= 2)
		//for (std::size_t group_size = 128; group_size <= 1024; group_size *= 2)
		for (std::size_t group_size = 256; group_size <= 256; group_size *= 2)
		{
            std::vector<IndexType*> indexes;

            // construct
            ss.start();
            for (size_t i = 0; i < 1024; i++)
            {
                IndexType* s = new IndexType(key_len, size, group_size, 0, kv_per_block);

                for (std::size_t kv_i = 0; kv_i < size; kv_i++)
                    s->insert(arr[kv_i]);
                s->flush();

                indexes.push_back(s);
            }
            ss.stop();
            uint64_t const_time = ss.real_time();

            // lookup
            //std::size_t lookups = 10000000 / group_size;
            const std::size_t lookups = 10000000;	// for SOSP paper
            uint64_t lookup_time_hit = 1;
            uint64_t lookup_time_miss = 1;
            for (std::size_t lookup_mode = 0; lookup_mode < 2; lookup_mode++)
            {
                srand(0);
                ss.start();
                for (std::size_t lookup_i = 0; lookup_i < lookups; lookup_i++)
                {
                    std::size_t i = static_cast<std::size_t>(rand()) % indexes.size();
                    std::size_t kv_i = static_cast<std::size_t>(rand()) % size;
                    if (lookup_mode == 0)
                    {
                        std::size_t idx = indexes[i]->locate(arr[kv_i]);
                        assert(kv_i / kv_per_block == idx / kv_per_block);
                        (void)idx;
                    }
                    else
                    {
                        std::size_t idx = indexes[i]->locate(arr2[kv_i]);
                        (void)idx;
                    }
                }
                ss.stop();
                if (lookup_mode == 0)
                    lookup_time_hit = ss.real_time();
                else
                    lookup_time_miss = ss.real_time();
            }

            printf("kv_per_block: %lu\n", kv_per_block);
            printf("group_size: %lu\n", group_size);
            printf("bits_per_key: %lf\n", static_cast<double>(indexes[0]->bit_size()) / static_cast<double>(size));
            printf("bits_per_key_trie_only: %lf\n", static_cast<double>(indexes[0]->bit_size_trie_only()) / static_cast<double>(size));
            printf("const_time_us: %lf\n", static_cast<double>(const_time) / static_cast<double>(indexes.size() * size) / 1000.);
            printf("lookup_time_us_hit: %lf\n", static_cast<double>(lookup_time_hit) / static_cast<double>(lookups) / 1000.);
            printf("lookup_time_us_miss: %lf\n", static_cast<double>(lookup_time_miss) / static_cast<double>(lookups) / 1000.);
            printf("\n");

            for (size_t i = 0; i < indexes.size(); i++)
                delete indexes[i];
            indexes.clear();
		}
	}

	trie_stats::print();
}