void perceptron::train(const sfv_t& sfv, const std::string& label) { std::string predicted_label = classify(sfv); if (label == predicted_label) { return; } update_weight(sfv, 1.f, label, predicted_label); }
/** * This function will perform one more iteration of training */ bool MarginActiveLearning::build_model_separable_iter(std::vector<DataPoint> &data_vec) { if(this->k >= n_iteration) return false; std::vector<int> indexVec; for (int i=0;i<data_vec.size();i++) indexVec.push_back(i); std::random_shuffle(indexVec.begin(), indexVec.end()); double d = (double) this->dimension; int m = (int)(C * sqrt(d) * (d * log(d) + log(this->k / this->delta))); double b = M_PI / pow(2.0, this->k - 1); int n_labeled = 0; for(int i = 0; i < data_vec.size(); i++) { /** * Try to add a DataPoint point. If the margin of point is less than b, * then include the point into working_sets and ask for a label. Otherwise, * drop the data point */ DataPoint dp = data_vec[indexVec[i]]; if(this->margin(dp) < b) { this->working_set.push_back(dp); n_labeled ++; n_label++; } if(n_labeled > m) break; } this->k += 1; update_weight(true); return true; }
void passive_aggressive_2::train(const common::sfv_t& sfv, const string& label) { check_touchable(label); labels_.get_model()->increment(label); string incorrect_label; float margin = calc_margin(sfv, label, incorrect_label); float loss = 1.f + margin; if (loss < 0.f) { storage_->register_label(label); return; } float sfv_norm = squared_norm(sfv); if (sfv_norm == 0.f) { storage_->register_label(label); return; } update_weight( sfv, loss / (2 * sfv_norm + 1 / (2 * config_.regularization_weight)), label, incorrect_label); touch(label); }
/*--------------------------------------------------------*/ void AzOptOnTree_TreeReg::update_with_features( double nlam, double nsig, double py_avg, AzRgf_forDelta *for_delta) /* updated */ { int tree_num = ens->size(); int tx; for (tx = 0; tx < tree_num; ++tx) { ens->tree_u(tx)->restoreDataIndexes(); AzReg_TreeReg *reg = reg_arr->reg(tx); reg->clearFocusNode(); AzIIarr iia_nx_fx; tree_feat->featIds(tx, &iia_nx_fx); int num = iia_nx_fx.size(); AzIIFarr iifa_nx_fx_delta; int ix; for (ix = 0; ix < num; ++ix) { int nx, fx; iia_nx_fx.get(ix, &nx, &fx); double delta = bestDelta(nx, fx, reg, nlam, nsig, py_avg, for_delta); update_weight(nx, fx, delta, reg); } ens->tree_u(tx)->releaseDataIndexes(); } }
void perceptron::train(const common::sfv_t& sfv, const std::string& label) { check_touchable(label); labels_.get_model()->increment(label); std::string predicted_label = classify(sfv); if (label == predicted_label) { return; } update_weight(sfv, 1.0, label, predicted_label); touch(label); }
void PA::train(const sfv_t& sfv, const string& label){ string incorrect_label; float margin = calc_margin(sfv, label, incorrect_label); float loss = 1.f + margin; if (loss < 0.f){ return; } float sfv_norm = squared_norm(sfv); if (sfv_norm == 0.f) { return; } update_weight(sfv, loss / sfv_norm, label, incorrect_label); }
/** * This function will perform one more iteration of training */ bool MarginActiveLearning::build_model_unseparable_iter(std::vector<DataPoint> &data_vec, double alpha, double beta) { if(this->k > n_iteration) return false; std::vector<int> indexVec; for (int i=0;i<data_vec.size();i++) indexVec.push_back(i); std::random_shuffle(indexVec.begin(), indexVec.end()); double d = (double) this->dimension; double b = pow(2.0, (alpha - 1) * k) * M_PI * pow(d, -0.5) * sqrt(5+alpha * k * log(beta) + log(2.0 + k)); double e = pow(2.0, alpha * (1 - k) - 4) * beta / sqrt(5 + alpha * k * log(2.0) - log(beta) + log(1.0 + k)); double m = C * pow(e, -2.0) * (d + log(k / delta)); std::cout << "k: "<< k <<",d: "<<d<<",b: "<<b<<",e: "<<e<<",m: "<< m<<endl; int n_labeled = 0; for(int i = 0; i < data_vec.size(); i++) { /** * Try to add a DataPoint point. If the margin of point is less than b, * then include the point into working_sets and ask for a label. Otherwise, * include the point into working_set with automatic label. */ DataPoint dp = data_vec[indexVec[i]]; if (k==1) { this->working_set.push_back(dp); n_labeled ++; n_label++; } else if(this->margin(dp) < b) { this->working_set.push_back(dp); n_labeled ++; n_label++; } else { dp.label = this->classify(dp); this->working_set.push_back(dp); } if(n_labeled >= m) break; } this->k += 1; update_weight(false); this->working_set.clear(); return true; }
static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_t num_samples, struct decorr_pass *dpp, int dir) { int32_t cont_samples = 0; int m = 0, i; #ifdef PACK_DECORR_MONO_PASS_CONT if (num_samples > 16 && dir > 0) { int32_t pre_samples = (dpp->term > MAX_TERM) ? 2 : dpp->term; cont_samples = num_samples - pre_samples; num_samples = pre_samples; } #endif dpp->sum_A = 0; if (dir < 0) { out_samples += (num_samples + cont_samples - 1); in_samples += (num_samples + cont_samples - 1); dir = -1; } else dir = 1; dpp->weight_A = restore_weight (store_weight (dpp->weight_A)); for (i = 0; i < 8; ++i) dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i])); if (dpp->term > MAX_TERM) { while (num_samples--) { int32_t left, sam_A; if (dpp->term & 1) sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1]; else sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; dpp->samples_A [1] = dpp->samples_A [0]; dpp->samples_A [0] = left = in_samples [0]; left -= apply_weight (dpp->weight_A, sam_A); update_weight (dpp->weight_A, dpp->delta, sam_A, left); dpp->sum_A += dpp->weight_A; out_samples [0] = left; in_samples += dir; out_samples += dir; } } else if (dpp->term > 0) {
void passive_aggressive_2::train(const common::sfv_t& sfv, const string& label) { string incorrect_label; float margin = calc_margin(sfv, label, incorrect_label); float loss = 1.f + margin; if (loss < 0.f) { return; } float sfv_norm = squared_norm(sfv); if (sfv_norm == 0.f) { return; } update_weight( sfv, loss / (2 * sfv_norm + 1 / (2 * config_.C)), label, incorrect_label); }
void passive_aggressive_1::train(const common::sfv_t& sfv, const string& label) { string incorrect_label; float margin = calc_margin(sfv, label, incorrect_label); float loss = 1.f + margin; if (loss < 0.f) { storage_->register_label(label); return; } float sfv_norm = squared_norm(sfv); if (sfv_norm == 0.f) { storage_->register_label(label); return; } update_weight( sfv, min(config_.C, loss / (2 * sfv_norm)), label, incorrect_label); touch(label); }
static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_t num_samples, struct decorr_pass *dpp, int dir) { int m = 0; dpp->sum_A = 0; #ifdef MINMAX_WEIGHTS dpp->min = dpp->max = 0; #endif if (dir < 0) { out_samples += (num_samples - 1); in_samples += (num_samples - 1); dir = -1; } else dir = 1; if (dpp->term > MAX_TERM) { while (num_samples--) { int32_t left, sam_A; if (dpp->term & 1) sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1]; else sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; dpp->samples_A [1] = dpp->samples_A [0]; dpp->samples_A [0] = left = in_samples [0]; left -= apply_weight (dpp->weight_A, sam_A); update_weight (dpp->weight_A, dpp->delta, sam_A, left); dpp->sum_A += dpp->weight_A; #ifdef MINMAX_WEIGHTS if (dpp->weight_A > dpp->max) dpp->max = dpp->weight_A; if (dpp->weight_A < dpp->min) dpp->min = dpp->weight_A; #endif out_samples [0] = left; in_samples += dir; out_samples += dir; } } else if (dpp->term > 0) {
/*! * \internal * * Add \a event with \a time occurence of this event to update statistics. * Complexity: O(log N) on average, where N - number of containing events */ void add_event(const E &event, time_t time) { std::unique_lock<std::mutex> locker(m_lock); auto it = m_treap.find(event.get_key()); if (it) { update_weight(*it, time, m_period, event.get_weight()); update_frequency(*it, time, m_period, 1.); it->set_time(time); m_treap.decrease_key(it); } else { if (m_num_events < m_max_events) { m_treap.insert(new E(event)); ++m_num_events; } else { auto t = m_treap.top(); m_treap.erase(t); *t = event; m_treap.insert(t); } } }
static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_t num_samples, struct decorr_pass *dpp, int dir) { int m = 0, i; dpp->sum_A = 0; if (dir < 0) { out_samples += (num_samples - 1); in_samples += (num_samples - 1); dir = -1; } else dir = 1; dpp->weight_A = restore_weight (store_weight (dpp->weight_A)); for (i = 0; i < 8; ++i) dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i])); if (dpp->term > MAX_TERM) { while (num_samples--) { int32_t left, sam_A; if (dpp->term & 1) sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1]; else sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; dpp->samples_A [1] = dpp->samples_A [0]; dpp->samples_A [0] = left = in_samples [0]; left -= apply_weight (dpp->weight_A, sam_A); update_weight (dpp->weight_A, dpp->delta, sam_A, left); dpp->sum_A += dpp->weight_A; out_samples [0] = left; in_samples += dir; out_samples += dir; } } else if (dpp->term > 0) {
// --------------------------------------------------------------------------------- // Functions dealing with two 1D distributions // --------------------------------------------------------------------------------- // The function solves the following problem: // Given two sets of samples of two classes, a positive one and a negative one, // a threshold-based classifier classifies a value x into a positive or a negative // class: sign(x - \theta). The optimal \theta is chosen based on different criteria: // - Minimize the classification error: \lambda * p(pos)*FRR + p(neg)*FAR // - Minimize the error without prior: \lambda * FRR + FAR // - Minimize FAR with constraint FRR <= maxFRR // - Minimize FRR with constraint FAR <= maxFAR // --------------------------------------------------------------------------------- // nspc[2]: number of samples per class // input[j]: array of input samples of class j // sort_id: array of (j,index) sorting the joint samples in ascending order // weight[j]: array of weights for the input samples of class j // // result = an array of 2 doubles representing: // result[0]: the threshold // result[1]: the optimized function value at that threshold void sdTSolve(int criterion, double param1, int *nspc, double **input, int *sort_id, double *result, double **weight) { int i, j, N = nspc[0] + nspc[1]; double we[2], tw[2]; double val, bval, bthresh, pos1, pos2; // get total weights if(weight) { tw[0] = cblas_dsum(nspc[0],weight[0],1); tw[1] = cblas_dsum(nspc[1],weight[1],1); } else { tw[0] = nspc[0]; tw[1] = nspc[1]; } // initalize everything from the left most we[0] = tw[0]; we[1] = 0; j = sort_id[0]; i = sort_id[1]; pos2 = input[j][i]; bthresh = pos2 - 1; bval = get_value(criterion,param1,we,tw); // run to the right most while(--N > 0) { update_weight(j,i,we,weight); val = get_value(criterion,param1,we,tw); // move to the right pos1 = pos2; j = *(sort_id += 2); i = sort_id[1]; pos2 = input[j][i]; // check if better if(val < bval) { bval = val; bthresh = 0.5 * (pos1 + pos2); } } update_weight(j,i,we,weight); val = get_value(criterion,param1,we,tw); // check if better if(val < bval) { bval = val; bthresh = pos2 + 1; } result[0] = bthresh; result[1] = bval; }