예제 #1
0
void recommender_base::complete_row(const common::sfv_t& query,
                                    common::sfv_t& ret) const {
  ret.clear();
  vector<pair<string, float> > ids;
  similar_row(query, ids, complete_row_similar_num_);
  if (ids.size() == 0) {
    return;
  }

  size_t exist_row_num = 0;
  for (size_t i = 0; i < ids.size(); ++i) {
    common::sfv_t row;
    orig_.get_row(ids[i].first, row);
    if (row.size() == 0) {
      continue;
    } else {
      ++exist_row_num;
    }
    float ratio = ids[i].second;
    for (size_t j = 0; j < row.size(); ++j) {
      ret.push_back(make_pair(row[j].first, row[j].second * ratio));
    }
  }

  if (exist_row_num == 0) {
    return;
  }
  common::sort_and_merge(ret);
  for (size_t i = 0; i < ret.size(); ++i) {
    ret[i].second /= exist_row_num;
  }
}
eigen_svec_t eigen_feature_mapper::convertc(const common::sfv_t& src) const {
  eigen_svec_t ret(d_);
  for (common::sfv_t::const_iterator it = src.begin(); it != src.end(); ++it) {
    insertc(*it, ret);
  }
  return ret;
}
예제 #3
0
void confidence_weighted::update(
    const common::sfv_t& sfv,
    float step_width,
    const string& pos_label,
    const string& neg_label) {
  util::concurrent::scoped_wlock lk(storage_->get_lock());
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t val2;
    storage_->get2_nolock(feature, val2);

    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(val2, pos_label, neg_label, pos_val, neg_val);

    const float C = config_.regularization_weight;
    float covar_pos_step = 2.f * step_width * val * val * C;
    float covar_neg_step = 2.f * step_width * val * val * C;

    storage_->set2_nolock(
        feature,
        pos_label,
        storage::val2_t(pos_val.v1 + step_width * pos_val.v2 * val,
                        1.f / (1.f / pos_val.v2 + covar_pos_step)));
    if (neg_label != "") {
      storage_->set2_nolock(
          feature,
          neg_label,
          storage::val2_t(neg_val.v1 - step_width * neg_val.v2 * val,
                          1.f / (1.f / neg_val.v2 + covar_neg_step)));
    }
  }
  touch(pos_label);
}
예제 #4
0
common::sfv_t scalar_dot(const common::sfv_t& p, double s) {
  common::sfv_t ret;
  for (common::sfv_t::const_iterator it = p.begin(); it != p.end(); ++it)  {
    ret.push_back(make_pair((*it).first, (*it).second*s));
  }
  return ret;
}
예제 #5
0
double sum2(const common::sfv_t& p) {
  double s = 0;
  for (common::sfv_t::const_iterator it = p.begin(); it != p.end(); ++it) {
    s += std::pow((*it).second, 2);
  }
  return s;
}
예제 #6
0
void local_storage::inp(const common::sfv_t& sfv, map_feature_val1_t& ret)
    const {
  ret.clear();

  std::vector<float> ret_id(class2id_.size());
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    const float val = it->second;
    id_features3_t::const_iterator it2 = tbl_.find(feature);
    if (it2 == tbl_.end()) {
      continue;
    }
    const id_feature_val3_t& m = it2->second;
    for (id_feature_val3_t::const_iterator it3 = m.begin(); it3 != m.end();
        ++it3) {
      ret_id[it3->first] += it3->second.v1 * val;
    }
  }

  for (size_t i = 0; i < ret_id.size(); ++i) {
    if (ret_id[i] == 0.f) {
      continue;
    }
    ret[class2id_.get_key(i)] = ret_id[i];
  }
}
예제 #7
0
void arow::update(
    const common::sfv_t& sfv,
    float alpha,
    float beta,
    const std::string& pos_label,
    const std::string& neg_label) {
  storage::storage_base* sto = get_storage();
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t ret;
    sto->get2(feature, ret);

    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(ret, pos_label, neg_label, pos_val, neg_val);

    sto->set2(
        feature,
        pos_label,
        storage::val2_t(
            pos_val.v1 + alpha * pos_val.v2 * val,
            pos_val.v2 - beta * pos_val.v2 * pos_val.v2 * val * val));
    if (neg_label != "") {
      sto->set2(
          feature,
          neg_label,
          storage::val2_t(
              neg_val.v1 - alpha * neg_val.v2 * val,
              neg_val.v2 - beta * neg_val.v2 * neg_val.v2 * val * val));
    }
  }
}
예제 #8
0
void local_storage::inp(const common::sfv_t& sfv, map_feature_val1_t& ret)
    const {
  ret.clear();

  scoped_rlock lk(mutex_);
  // Use uin64_t map instead of string map as hash function for string is slow
  jubatus::util::data::unordered_map<uint64_t, double> ret_id;
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    const double val = it->second;
    id_features3_t::const_iterator it2 = tbl_.find(feature);
    if (it2 == tbl_.end()) {
      continue;
    }
    const id_feature_val3_t& m = it2->second;
    for (id_feature_val3_t::const_iterator it3 = m.begin(); it3 != m.end();
        ++it3) {
      ret_id[it3->first] += it3->second.v1 * val;
    }
  }

  std::vector<std::string> labels = class2id_.get_all_id2key();
  for (size_t i = 0; i < labels.size(); ++i) {
    const std::string& label = labels[i];
    uint64_t id = class2id_.get_id_const(label);
    if (id == common::key_manager::NOTFOUND || ret_id.count(id) == 0) {
      ret[label] = 0.0;
    } else {
      ret[label] = ret_id[id];
    }
  }
}
예제 #9
0
void storage_base::inp(const common::sfv_t& sfv, map_feature_val1_t& ret)
    const {
  ret.clear();
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    const float val = it->second;
    feature_val1_t fval1;
    get(feature, fval1);
    for (feature_val1_t::const_iterator it2 = fval1.begin(); it2 != fval1.end();
        ++it2) {
      ret[it2->first] += it2->second * val;
    }
  }
}
예제 #10
0
void recommender_base::complete_row(const std::string& id,
                                    common::sfv_t& ret) const {
  ret.clear();
  common::sfv_t sfv;
  orig_.get_row(id, sfv);
  complete_row(sfv, ret);
}
예제 #11
0
float recommender_base::calc_l2norm(const common::sfv_t& query) {
  float ret = 0.f;
  for (size_t i = 0; i < query.size(); ++i) {
    ret += query[i].second * query[i].second;
  }
  return sqrt(ret);
}
float inverted_index_storage::calc_l2norm(const common::sfv_t& sfv) {
  float ret = 0.f;
  for (size_t i = 0; i < sfv.size(); ++i) {
    ret += sfv[i].second * sfv[i].second;
  }
  return std::sqrt(ret);
}
void inverted_index_storage::calc_scores(
    const common::sfv_t& query,
    vector<pair<string, float> >& scores,
    size_t ret_num) const {
  float query_norm = calc_l2norm(query);
  if (query_norm == 0.f) {
    return;
  }
  jubatus::util::data::unordered_map<uint64_t, float> i_scores;
  for (size_t i = 0; i < query.size(); ++i) {
    const string& fid = query[i].first;
    float val = query[i].second;
    add_inp_scores(fid, val, i_scores);
  }

  vector<pair<float, uint64_t> > sorted_scores;
  for (jubatus::util::data::unordered_map<uint64_t, float>::
      const_iterator it = i_scores.begin(); it != i_scores.end(); ++it) {
    float norm = calc_columnl2norm(it->first);
    float normed_score = (norm != 0.f) ? it->second / norm / query_norm : 0.f;
    sorted_scores.push_back(make_pair(normed_score, it->first));
  }
  sort(sorted_scores.rbegin(), sorted_scores.rend());
  for (size_t i = 0; i < sorted_scores.size() && i < ret_num; ++i) {
    scores.push_back(
        make_pair(column2id_.get_key(sorted_scores[i].second),
                  sorted_scores[i].first));
  }
}
예제 #14
0
static float squared_norm(const common::sfv_t& fv) {
  float norm = 0.f;
  for (size_t i = 0; i < fv.size(); ++i) {
    norm += fv[i].second * fv[i].second;
  }
  return norm;
}
예제 #15
0
 void add_feature(const std::string& key,
                  double value,
                  common::sfv_t& ret_fv) const {
   std::stringstream ss;
   ss << key << "$" << value;
   ret_fv.push_back(std::make_pair(ss.str(), static_cast<float>(1.0)));
 }
예제 #16
0
float classifier_base::squared_norm(const common::sfv_t& fv) {
  float ret = 0.f;
  for (size_t i = 0; i < fv.size(); ++i) {
    ret += fv[i].second * fv[i].second;
  }
  return ret;
}
예제 #17
0
float classifier_base::calc_margin_and_variance(
    const common::sfv_t& sfv,
    const string& label,
    string& incorrect_label,
    float& var) const {
  float margin = calc_margin(sfv, label, incorrect_label);
  var = 0.f;

  for (size_t i = 0; i < sfv.size(); ++i) {
    const string& feature = sfv[i].first;
    const float val = sfv[i].second;
    feature_val2_t weight_covars;
    storage_->get2(feature, weight_covars);
    float label_covar = 1.f;
    float incorrect_label_covar = 1.f;
    for (size_t j = 0; j < weight_covars.size(); ++j) {
      if (weight_covars[j].first == label) {
        label_covar = weight_covars[j].second.v2;
      } else if (weight_covars[j].first == incorrect_label) {
        incorrect_label_covar = weight_covars[j].second.v2;
      }
    }
    var += (label_covar + incorrect_label_covar) * val * val;
  }
  return margin;
}
예제 #18
0
 void add_feature(const std::string& key,
                  double value,
                  common::sfv_t& ret_fv) const {
   ret_fv.push_back(std::make_pair(
       key,
       static_cast<float>(std::log(std::max(1.0, value)))));
 }
void eigen_feature_mapper::rinsert(
    const pair<int, float>& item,
    common::sfv_t& dst) const {
  if (rmap_.find(item.first) != rmap_.end()) {
    dst.push_back(
        make_pair((rmap_.find(item.first))->second, item.second));
  }
}
예제 #20
0
void normal_herd::update(
    const common::sfv_t& sfv,
    float margin,
    float variance,
    const string& pos_label,
    const string& neg_label) {
  storage::storage_base* sto = get_storage();
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t ret;
    sto->get2(feature, ret);

    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(ret, pos_label, neg_label, pos_val, neg_val);

    float val_covariance_pos = val * pos_val.v2;
    float val_covariance_neg = val * neg_val.v2;

    const float C = config_.C;
    sto->set2(
        feature,
        pos_label,
        storage::val2_t(
            pos_val.v1
                + (1.f - margin) * val_covariance_pos
                    / (val_covariance_pos * val + 1.f / C),
            1.f
                / ((1.f / pos_val.v2) + (2 * C + C * C * variance)
                    * val * val)));
    if (neg_label != "") {
      sto->set2(
          feature,
          neg_label,
          storage::val2_t(
              neg_val.v1
                  - (1.f - margin) * val_covariance_neg
                      / (val_covariance_neg * val + 1.f / C),
              1.f
                  / ((1.f / neg_val.v2) + (2 * C + C * C * variance)
                      * val * val)));
    }
  }
}
예제 #21
0
void scalar_mul_and_add(
    const common::sfv_t& left,
    float s,
    common::sfv_t& right) {
  common::sfv_t::const_iterator l = left.begin();
  common::sfv_t::iterator r = right.begin();
  while (l != left.end() && r != right.end()) {
    if (l->first < r->first) {
      std::pair<std::string, float> p = *l;
      p.second *= s;
      r = right.insert(r, p);
      ++l;
    } else if (l->first > r->first) {
      ++r;
    } else {
      r->second += l->second * s;
      ++l;
      ++r;
    }
  }
  for (; l != left.end(); ++l) {
    std::pair<std::string, float> p = *l;
    p.second *= s;
    right.push_back(p);
  }
}
예제 #22
0
vector<float> random_projection(const common::sfv_t& sfv, uint32_t hash_num) {
  vector<float> proj(hash_num);
  for (size_t i = 0; i < sfv.size(); ++i) {
    const uint32_t seed = common::hash_util::calc_string_hash(sfv[i].first);
    jubatus::util::math::random::mtrand rnd(seed);
    for (uint32_t j = 0; j < hash_num; ++j) {
      proj[j] += sfv[i].second * rnd.next_gaussian();
    }
  }
  return proj;
}
예제 #23
0
void revert_feature(const common::sfv_t& fv, fv_converter::datum& data) {
  for (size_t i = 0; i < fv.size(); ++i) {
    std::pair<std::string, double> num_value;
    std::pair<std::string, std::string> string_value;
    if (revert_num_value(fv[i], num_value)) {
      data.num_values_.push_back(num_value);
    } else if (revert_string_value(fv[i], string_value)) {
      data.string_values_.push_back(string_value);
    }
  }
}
예제 #24
0
common::sfv_t add(const common::sfv_t& p1, const common::sfv_t& p2) {
  common::sfv_t ret;
  common::sfv_t::const_iterator it1 = p1.begin();
  common::sfv_t::const_iterator it2 = p2.begin();
  while (it1 != p1.end() && it2 != p2.end()) {
    if ((*it1).first < (*it2).first) {
      ret.push_back((*it1));
      ++it1;
    } else if ((*it1).first > (*it2).first) {
      ret.push_back((*it2));
      ++it2;
    } else {
      ret.push_back(make_pair((*it1).first, (*it1).second + (*it2).second));
      ++it1;
      ++it2;
    }
  }
  for (; it1 != p1.end(); ++it1) {
    ret.push_back((*it1));
  }
  for (; it2 != p2.end(); ++it2) {
    ret.push_back((*it2));
  }

  return ret;
}
예제 #25
0
double dist(const common::sfv_t& p1, const common::sfv_t& p2) {
  double ret = 0;
  common::sfv_t::const_iterator it1 = p1.begin();
  common::sfv_t::const_iterator it2 = p2.begin();
  while (it1 != p1.end() && it2 != p2.end()) {
    int cmp = strcmp(it1->first.c_str(), it2->first.c_str());
    if (cmp < 0) {
      ret += it1->second * it1->second;
      ++it1;
    } else if (cmp > 0) {
      ret += it2->second * it2->second;
      ++it2;
    } else {
      ret += (it1->second  - it2->second) * (it1->second - it2->second);
      ++it1;
      ++it2;
    }
  }
  for (; it1 != p1.end(); ++it1) {
    ret += std::pow(it1->second, 2);
  }
  for (; it2 != p2.end(); ++it2) {
    ret += std::pow(it2->second, 2);
  }
  return std::sqrt(ret);
}
예제 #26
0
float recommender_base::calc_similality(common::sfv_t& q1, common::sfv_t& q2) {
  float q1_norm = calc_l2norm(q1);
  float q2_norm = calc_l2norm(q2);
  if (q1_norm == 0.f || q2_norm == 0.f) {
    return 0.f;
  }
  sort(q1.begin(), q1.end());
  sort(q2.begin(), q2.end());

  size_t i1 = 0;
  size_t i2 = 0;
  float ret = 0.f;
  while (i1 < q1.size() && i2 < q2.size()) {
    const string& ind1 = q1[i1].first;
    const string& ind2 = q2[i2].first;
    if (ind1 < ind2) {
      ++i1;
    } else if (ind1 > ind2) {
      ++i2;
    } else {
      ret += q1[i1].second * q2[i2].second;
      ++i1;
      ++i2;
    }
  }

  return ret / q1_norm / q2_norm;
}
예제 #27
0
void weight_manager::get_weight(common::sfv_t& fv) const {
  for (common::sfv_t::iterator it = fv.begin(); it != fv.end(); ++it) {
    double global_weight = get_global_weight(it->first);
    it->second = static_cast<float>(it->second * global_weight);
  }
  fv.erase(remove_if(fv.begin(), fv.end(), is_zero()), fv.end());
}
예제 #28
0
void arow::update(
    const common::sfv_t& sfv,
    double alpha,
    double beta) {
  util::concurrent::scoped_wlock lk(storage_->get_lock());
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const std::string& feature = it->first;
    double val = it->second;
    storage::feature_val2_t val2;
    storage_->get2_nolock(feature, val2);
    storage::val2_t current_val(0.0, 1.0);
    if (val2.size() > 0) {
      current_val = val2[0].second;
    }

    storage_->set2_nolock(
        feature,
        "+",
        storage::val2_t(current_val.v1 + alpha * current_val.v2 * val,
          current_val.v2 - beta * current_val.v2 * current_val.v2* val * val));
  }
}
예제 #29
0
vector<float> euclid_lsh::calculate_lsh(const common::sfv_t& query) const {
  vector<float> hash(mixable_storage_->get_model()->all_lsh_num());
  for (size_t i = 0; i < query.size(); ++i) {
    const uint32_t seed = common::hash_util::calc_string_hash(query[i].first);
    const vector<float> proj = get_projection(seed);
    for (size_t j = 0; j < hash.size(); ++j) {
      hash[j] += query[i].second * proj[j];
    }
  }
  for (size_t j = 0; j < hash.size(); ++j) {
    hash[j] /= bin_width_;
  }
  return hash;
}
예제 #30
0
void local_storage::bulk_update(
    const common::sfv_t& sfv,
    float step_width,
    const string& inc_class,
    const string& dec_class) {
  uint64_t inc_id = class2id_.get_id(inc_class);
  typedef common::sfv_t::const_iterator iter_t;
  if (dec_class != "") {
    uint64_t dec_id = class2id_.get_id(dec_class);
    for (iter_t it = sfv.begin(); it != sfv.end(); ++it) {
      float val = it->second * step_width;
      id_feature_val3_t& feature_row = tbl_[it->first];
      feature_row[inc_id].v1 += val;
      feature_row[dec_id].v1 -= val;
    }
  } else {
    for (iter_t it = sfv.begin(); it != sfv.end(); ++it) {
      float val = it->second * step_width;
      id_feature_val3_t& feature_row = tbl_[it->first];
      feature_row[inc_id].v1 += val;
    }
  }
}