Esempio n. 1
0
void euclid_lsh::neighbor_row_from_hash(
    const bit_vector& bv,
    float norm,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  // This function is not thread safe.
  // Take lock out of this function.
  jubatus::util::lang::shared_ptr<const column_table> table =
    get_const_table();
  const_bit_vector_column& bv_col = lsh_column();
  const_float_column& norm_col = norm_column();
  const float denom = bv.bit_num();
  heap_t heap(ret_num);
  jubatus::util::lang::function<heap_t(size_t, size_t)> f =
    jubatus::util::lang::bind(
      &ranking_hamming_bit_vectors_worker, &bv, &bv_col, &norm_col,
      denom, norm, ret_num,
      jubatus::util::lang::_1, jubatus::util::lang::_2);
  ranking_hamming_bit_vectors_internal(
      f, table->size_nolock(), threads_, heap);

  vector<pair<float, size_t> > sorted;
  heap.get_sorted(sorted);

  ids.clear();
  for (size_t i = 0; i < sorted.size(); ++i) {
    ids.push_back(make_pair(
      table->get_key_nolock(sorted[i].second), sorted[i].first));
  }
}
Esempio n. 2
0
void euclid_lsh::neighbor_row_from_hash(
    const bit_vector& bv,
    float norm,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  jubatus::util::lang::shared_ptr<const column_table> table = get_const_table();

  jubatus::core::storage::fixed_size_heap<pair<float, size_t> > heap(ret_num);
  {
    const_bit_vector_column& bv_col = lsh_column();
    const_float_column& norm_col = norm_column();

    const float denom = bv.bit_num();
    for (size_t i = 0; i < table->size(); ++i) {
      const size_t hamm_dist = bv.calc_hamming_distance(bv_col[i]);
      const float theta = hamm_dist * M_PI / denom;
      const float score = norm_col[i] * (norm_col[i] - 2 * norm * cos(theta));
      heap.push(make_pair(score, i));
    }
  }

  vector<pair<float, size_t> > sorted;
  heap.get_sorted(sorted);

  ids.clear();
  const float squared_norm = norm * norm;
  for (size_t i = 0; i < sorted.size(); ++i) {
    ids.push_back(make_pair(table->get_key(sorted[i].second),
                            sqrt(squared_norm + sorted[i].first)));
  }
}
Esempio n. 3
0
void euclid_lsh::neighbor_row(
    const std::string& query_id,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  const pair<bool, uint64_t> maybe_index =
      get_const_table()->exact_match(query_id);
  if (!maybe_index.first) {
    ids.clear();
    return;
  }

  const bit_vector bv = lsh_column()[maybe_index.second];
  const float norm = norm_column()[maybe_index.second];
  neighbor_row_from_hash(bv, norm, ids, ret_num);
}
Esempio n. 4
0
void euclid_lsh::neighbor_row(
    const std::string& query_id,
    vector<pair<string, float> >& ids,
    uint64_t ret_num) const {
  util::concurrent::scoped_rlock lk(get_const_table()->get_mutex());

  /* table lock acquired; all subsequent table operations must be nolock */

  const pair<bool, uint64_t> maybe_index =
      get_const_table()->exact_match_nolock(query_id);
  if (!maybe_index.first) {
    ids.clear();
    return;
  }

  const bit_vector bv = lsh_column()[maybe_index.second];
  const float norm = norm_column()[maybe_index.second];
  neighbor_row_from_hash(bv, norm, ids, ret_num);
}