Example #1
0
double dist(const common::sfv_t& p1, const common::sfv_t& p2) {
  double ret = 0;
  common::sfv_t::const_iterator it1 = p1.begin();
  common::sfv_t::const_iterator it2 = p2.begin();
  while (it1 != p1.end() && it2 != p2.end()) {
    int cmp = strcmp(it1->first.c_str(), it2->first.c_str());
    if (cmp < 0) {
      ret += it1->second * it1->second;
      ++it1;
    } else if (cmp > 0) {
      ret += it2->second * it2->second;
      ++it2;
    } else {
      ret += (it1->second  - it2->second) * (it1->second - it2->second);
      ++it1;
      ++it2;
    }
  }
  for (; it1 != p1.end(); ++it1) {
    ret += std::pow(it1->second, 2);
  }
  for (; it2 != p2.end(); ++it2) {
    ret += std::pow(it2->second, 2);
  }
  return std::sqrt(ret);
}
Example #2
0
float recommender_base::calc_similality(common::sfv_t& q1, common::sfv_t& q2) {
  float q1_norm = calc_l2norm(q1);
  float q2_norm = calc_l2norm(q2);
  if (q1_norm == 0.f || q2_norm == 0.f) {
    return 0.f;
  }
  sort(q1.begin(), q1.end());
  sort(q2.begin(), q2.end());

  size_t i1 = 0;
  size_t i2 = 0;
  float ret = 0.f;
  while (i1 < q1.size() && i2 < q2.size()) {
    const string& ind1 = q1[i1].first;
    const string& ind2 = q2[i2].first;
    if (ind1 < ind2) {
      ++i1;
    } else if (ind1 > ind2) {
      ++i2;
    } else {
      ret += q1[i1].second * q2[i2].second;
      ++i1;
      ++i2;
    }
  }

  return ret / q1_norm / q2_norm;
}
Example #3
0
void scalar_mul_and_add(
    const common::sfv_t& left,
    float s,
    common::sfv_t& right) {
  common::sfv_t::const_iterator l = left.begin();
  common::sfv_t::iterator r = right.begin();
  while (l != left.end() && r != right.end()) {
    if (l->first < r->first) {
      std::pair<std::string, float> p = *l;
      p.second *= s;
      r = right.insert(r, p);
      ++l;
    } else if (l->first > r->first) {
      ++r;
    } else {
      r->second += l->second * s;
      ++l;
      ++r;
    }
  }
  for (; l != left.end(); ++l) {
    std::pair<std::string, float> p = *l;
    p.second *= s;
    right.push_back(p);
  }
}
Example #4
0
common::sfv_t add(const common::sfv_t& p1, const common::sfv_t& p2) {
  common::sfv_t ret;
  common::sfv_t::const_iterator it1 = p1.begin();
  common::sfv_t::const_iterator it2 = p2.begin();
  while (it1 != p1.end() && it2 != p2.end()) {
    if ((*it1).first < (*it2).first) {
      ret.push_back((*it1));
      ++it1;
    } else if ((*it1).first > (*it2).first) {
      ret.push_back((*it2));
      ++it2;
    } else {
      ret.push_back(make_pair((*it1).first, (*it1).second + (*it2).second));
      ++it1;
      ++it2;
    }
  }
  for (; it1 != p1.end(); ++it1) {
    ret.push_back((*it1));
  }
  for (; it2 != p2.end(); ++it2) {
    ret.push_back((*it2));
  }

  return ret;
}
Example #5
0
void weight_manager::get_weight(common::sfv_t& fv) const {
  for (common::sfv_t::iterator it = fv.begin(); it != fv.end(); ++it) {
    double global_weight = get_global_weight(it->first);
    it->second = static_cast<float>(it->second * global_weight);
  }
  fv.erase(remove_if(fv.begin(), fv.end(), is_zero()), fv.end());
}
Example #6
0
void arow::update(
    const common::sfv_t& sfv,
    float alpha,
    float beta,
    const std::string& pos_label,
    const std::string& neg_label) {
  storage::storage_base* sto = get_storage();
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t ret;
    sto->get2(feature, ret);

    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(ret, pos_label, neg_label, pos_val, neg_val);

    sto->set2(
        feature,
        pos_label,
        storage::val2_t(
            pos_val.v1 + alpha * pos_val.v2 * val,
            pos_val.v2 - beta * pos_val.v2 * pos_val.v2 * val * val));
    if (neg_label != "") {
      sto->set2(
          feature,
          neg_label,
          storage::val2_t(
              neg_val.v1 - alpha * neg_val.v2 * val,
              neg_val.v2 - beta * neg_val.v2 * neg_val.v2 * val * val));
    }
  }
}
Example #7
0
void local_storage::inp(const common::sfv_t& sfv, map_feature_val1_t& ret)
    const {
  ret.clear();

  scoped_rlock lk(mutex_);
  // Use uin64_t map instead of string map as hash function for string is slow
  jubatus::util::data::unordered_map<uint64_t, double> ret_id;
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    const double val = it->second;
    id_features3_t::const_iterator it2 = tbl_.find(feature);
    if (it2 == tbl_.end()) {
      continue;
    }
    const id_feature_val3_t& m = it2->second;
    for (id_feature_val3_t::const_iterator it3 = m.begin(); it3 != m.end();
        ++it3) {
      ret_id[it3->first] += it3->second.v1 * val;
    }
  }

  std::vector<std::string> labels = class2id_.get_all_id2key();
  for (size_t i = 0; i < labels.size(); ++i) {
    const std::string& label = labels[i];
    uint64_t id = class2id_.get_id_const(label);
    if (id == common::key_manager::NOTFOUND || ret_id.count(id) == 0) {
      ret[label] = 0.0;
    } else {
      ret[label] = ret_id[id];
    }
  }
}
Example #8
0
void local_storage::inp(const common::sfv_t& sfv, map_feature_val1_t& ret)
    const {
  ret.clear();

  std::vector<float> ret_id(class2id_.size());
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    const float val = it->second;
    id_features3_t::const_iterator it2 = tbl_.find(feature);
    if (it2 == tbl_.end()) {
      continue;
    }
    const id_feature_val3_t& m = it2->second;
    for (id_feature_val3_t::const_iterator it3 = m.begin(); it3 != m.end();
        ++it3) {
      ret_id[it3->first] += it3->second.v1 * val;
    }
  }

  for (size_t i = 0; i < ret_id.size(); ++i) {
    if (ret_id[i] == 0.f) {
      continue;
    }
    ret[class2id_.get_key(i)] = ret_id[i];
  }
}
Example #9
0
double sum2(const common::sfv_t& p) {
  double s = 0;
  for (common::sfv_t::const_iterator it = p.begin(); it != p.end(); ++it) {
    s += std::pow((*it).second, 2);
  }
  return s;
}
Example #10
0
common::sfv_t scalar_dot(const common::sfv_t& p, double s) {
  common::sfv_t ret;
  for (common::sfv_t::const_iterator it = p.begin(); it != p.end(); ++it)  {
    ret.push_back(make_pair((*it).first, (*it).second*s));
  }
  return ret;
}
eigen_svec_t eigen_feature_mapper::convertc(const common::sfv_t& src) const {
  eigen_svec_t ret(d_);
  for (common::sfv_t::const_iterator it = src.begin(); it != src.end(); ++it) {
    insertc(*it, ret);
  }
  return ret;
}
void confidence_weighted::update(
    const common::sfv_t& sfv,
    float step_width,
    const string& pos_label,
    const string& neg_label) {
  util::concurrent::scoped_wlock lk(storage_->get_lock());
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t val2;
    storage_->get2_nolock(feature, val2);

    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(val2, pos_label, neg_label, pos_val, neg_val);

    const float C = config_.regularization_weight;
    float covar_pos_step = 2.f * step_width * val * val * C;
    float covar_neg_step = 2.f * step_width * val * val * C;

    storage_->set2_nolock(
        feature,
        pos_label,
        storage::val2_t(pos_val.v1 + step_width * pos_val.v2 * val,
                        1.f / (1.f / pos_val.v2 + covar_pos_step)));
    if (neg_label != "") {
      storage_->set2_nolock(
          feature,
          neg_label,
          storage::val2_t(neg_val.v1 - step_width * neg_val.v2 * val,
                          1.f / (1.f / neg_val.v2 + covar_neg_step)));
    }
  }
  touch(pos_label);
}
Example #13
0
void storage_base::inp(const common::sfv_t& sfv, map_feature_val1_t& ret)
    const {
  ret.clear();
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    const float val = it->second;
    feature_val1_t fval1;
    get(feature, fval1);
    for (feature_val1_t::const_iterator it2 = fval1.begin(); it2 != fval1.end();
        ++it2) {
      ret[it2->first] += it2->second * val;
    }
  }
}
void local_storage::bulk_update(
    const common::sfv_t& sfv,
    float step_width,
    const string& inc_class,
    const string& dec_class) {
  uint64_t inc_id = class2id_.get_id(inc_class);
  typedef common::sfv_t::const_iterator iter_t;
  if (dec_class != "") {
    uint64_t dec_id = class2id_.get_id(dec_class);
    for (iter_t it = sfv.begin(); it != sfv.end(); ++it) {
      float val = it->second * step_width;
      id_feature_val3_t& feature_row = tbl_[it->first];
      feature_row[inc_id].v1 += val;
      feature_row[dec_id].v1 -= val;
    }
  } else {
    for (iter_t it = sfv.begin(); it != sfv.end(); ++it) {
      float val = it->second * step_width;
      id_feature_val3_t& feature_row = tbl_[it->first];
      feature_row[inc_id].v1 += val;
    }
  }
}
Example #15
0
void normal_herd::update(
    const common::sfv_t& sfv,
    float margin,
    float variance,
    const string& pos_label,
    const string& neg_label) {
  storage::storage_base* sto = get_storage();
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    float val = it->second;
    storage::feature_val2_t ret;
    sto->get2(feature, ret);

    storage::val2_t pos_val(0.f, 1.f);
    storage::val2_t neg_val(0.f, 1.f);
    ClassifierUtil::get_two(ret, pos_label, neg_label, pos_val, neg_val);

    float val_covariance_pos = val * pos_val.v2;
    float val_covariance_neg = val * neg_val.v2;

    const float C = config_.C;
    sto->set2(
        feature,
        pos_label,
        storage::val2_t(
            pos_val.v1
                + (1.f - margin) * val_covariance_pos
                    / (val_covariance_pos * val + 1.f / C),
            1.f
                / ((1.f / pos_val.v2) + (2 * C + C * C * variance)
                    * val * val)));
    if (neg_label != "") {
      sto->set2(
          feature,
          neg_label,
          storage::val2_t(
              neg_val.v1
                  - (1.f - margin) * val_covariance_neg
                      / (val_covariance_neg * val + 1.f / C),
              1.f
                  / ((1.f / neg_val.v2) + (2 * C + C * C * variance)
                      * val * val)));
    }
  }
}
Example #16
0
void arow::update(
    const common::sfv_t& sfv,
    double alpha,
    double beta) {
  util::concurrent::scoped_wlock lk(storage_->get_lock());
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const std::string& feature = it->first;
    double val = it->second;
    storage::feature_val2_t val2;
    storage_->get2_nolock(feature, val2);
    storage::val2_t current_val(0.0, 1.0);
    if (val2.size() > 0) {
      current_val = val2[0].second;
    }

    storage_->set2_nolock(
        feature,
        "+",
        storage::val2_t(current_val.v1 + alpha * current_val.v2 * val,
          current_val.v2 - beta * current_val.v2 * current_val.v2* val * val));
  }
}
Example #17
0
void storage_base::bulk_update(
    const common::sfv_t& sfv,
    float step_width,
    const std::string& inc_class,
    const std::string& dec_class) {
  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
    const string& feature = it->first;
    float val = it->second;
    if (dec_class != "") {
      update(feature, inc_class, dec_class, step_width * val);
    } else {
      feature_val1_t ret;
      get(feature, ret);
      float pos_val = 0.f;
      for (size_t i = 0; i < ret.size(); ++i) {
        if (ret[i].first == inc_class) {
          pos_val = ret[i].second;
          break;
        }
      }
      set(feature, inc_class, pos_val + step_width * val);
    }
  }
}