Exemplo n.º 1
0
void perceptron::train(const sfv_t& sfv, const std::string& label) {
  std::string predicted_label = classify(sfv);
  if (label == predicted_label) {
    return;
  }
  update_weight(sfv, 1.f, label, predicted_label);
}
/**
 * This function will perform one more iteration of training
 */
bool MarginActiveLearning::build_model_separable_iter(std::vector<DataPoint> &data_vec)
{
    if(this->k >= n_iteration)
        return false;
	std::vector<int> indexVec;
	for (int i=0;i<data_vec.size();i++)
		indexVec.push_back(i);
	std::random_shuffle(indexVec.begin(), indexVec.end());

    double d = (double) this->dimension;
    int m = (int)(C * sqrt(d) * (d * log(d) + log(this->k / this->delta)));
    double b = M_PI / pow(2.0, this->k - 1);

    int n_labeled = 0;
    for(int i = 0; i < data_vec.size(); i++) {
        /**
         * Try to add a DataPoint point. If the margin of point is less than b,
         * then include the point into working_sets and ask for a label. Otherwise,
         * drop the data point
        */
		DataPoint dp = data_vec[indexVec[i]];
        if(this->margin(dp) < b) {
            this->working_set.push_back(dp);
			n_labeled ++;
            n_label++;
        }
        if(n_labeled > m)
            break;
    }
    this->k += 1;
    update_weight(true);

	return true;
}
Exemplo n.º 3
0
void passive_aggressive_2::train(const common::sfv_t& sfv,
                                 const string& label) {
  check_touchable(label);

  labels_.get_model()->increment(label);

  string incorrect_label;
  float margin = calc_margin(sfv, label, incorrect_label);
  float loss = 1.f + margin;

  if (loss < 0.f) {
    storage_->register_label(label);
    return;
  }
  float sfv_norm = squared_norm(sfv);
  if (sfv_norm == 0.f) {
    storage_->register_label(label);
    return;
  }
  update_weight(
      sfv,
      loss / (2 * sfv_norm + 1 / (2 * config_.regularization_weight)),
      label,
      incorrect_label);
  touch(label);
}
/*--------------------------------------------------------*/
void AzOptOnTree_TreeReg::update_with_features(
                      double nlam, 
                      double nsig, 
                      double py_avg, 
                      AzRgf_forDelta *for_delta) /* updated */
{
  int tree_num = ens->size();
  int tx; 
  for (tx = 0; tx < tree_num; ++tx) {
    ens->tree_u(tx)->restoreDataIndexes(); 
    AzReg_TreeReg *reg = reg_arr->reg(tx); 
    reg->clearFocusNode(); 

    AzIIarr iia_nx_fx; 
    tree_feat->featIds(tx, &iia_nx_fx); 
    int num = iia_nx_fx.size(); 
    AzIIFarr iifa_nx_fx_delta; 
    int ix; 
    for (ix = 0; ix < num; ++ix) {
      int nx, fx; 
      iia_nx_fx.get(ix, &nx, &fx); 

      double delta = bestDelta(nx, fx, reg, nlam, nsig, py_avg, for_delta); 
      update_weight(nx, fx, delta, reg);
    }
    ens->tree_u(tx)->releaseDataIndexes(); 
  }
}                                         
Exemplo n.º 5
0
void perceptron::train(const common::sfv_t& sfv, const std::string& label) {
  check_touchable(label);

  labels_.get_model()->increment(label);

  std::string predicted_label = classify(sfv);
  if (label == predicted_label) {
    return;
  }
  update_weight(sfv, 1.0, label, predicted_label);
  touch(label);
}
Exemplo n.º 6
0
void PA::train(const sfv_t& sfv, const string& label){
  string incorrect_label;
  float margin = calc_margin(sfv, label, incorrect_label);
  float loss = 1.f + margin;
  if (loss < 0.f){
    return;
  }
  float sfv_norm = squared_norm(sfv);
  if (sfv_norm == 0.f) {
    return;
  }
  update_weight(sfv, loss / sfv_norm, label, incorrect_label);
}
/**
 * This function will perform one more iteration of training
 */
bool MarginActiveLearning::build_model_unseparable_iter(std::vector<DataPoint> &data_vec, double alpha, double beta)
{
    if(this->k > n_iteration)
        return false;
	std::vector<int> indexVec;

	for (int i=0;i<data_vec.size();i++)
		indexVec.push_back(i);
	std::random_shuffle(indexVec.begin(), indexVec.end());
    
    double d = (double) this->dimension;
    double b = pow(2.0, (alpha - 1) * k) * M_PI * pow(d, -0.5) * sqrt(5+alpha * k * log(beta) + log(2.0 + k));
    double e = pow(2.0, alpha * (1 - k) - 4) * beta / sqrt(5 + alpha * k * log(2.0) - log(beta) + log(1.0 + k));
    double m = C * pow(e, -2.0) * (d + log(k / delta));

	std::cout << "k: "<< k <<",d: "<<d<<",b: "<<b<<",e: "<<e<<",m: "<< m<<endl;
    
    int n_labeled = 0;
	
    for(int i = 0; i < data_vec.size(); i++) {
        /**
         * Try to add a DataPoint point. If the margin of point is less than b,
         * then include the point into working_sets and ask for a label. Otherwise,
         * include the point into working_set with automatic label.
         */
		DataPoint dp = data_vec[indexVec[i]];
		if (k==1)
		{
			this->working_set.push_back(dp);
			n_labeled ++;
			n_label++;
		}
        else if(this->margin(dp) < b) {
            this->working_set.push_back(dp);

			n_labeled ++;
            n_label++;
        } else {
            dp.label = this->classify(dp);
            this->working_set.push_back(dp);
        }

        if(n_labeled >= m)
            break;
	}
    this->k += 1;
    update_weight(false);
    this->working_set.clear();
	return true;
}
Exemplo n.º 8
0
static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_t num_samples, struct decorr_pass *dpp, int dir)
{
    int32_t cont_samples = 0;
    int m = 0, i;

#ifdef PACK_DECORR_MONO_PASS_CONT
    if (num_samples > 16 && dir > 0) {
        int32_t pre_samples = (dpp->term > MAX_TERM) ? 2 : dpp->term;
        cont_samples = num_samples - pre_samples;
        num_samples = pre_samples;
    }
#endif

    dpp->sum_A = 0;

    if (dir < 0) {
        out_samples += (num_samples + cont_samples - 1);
        in_samples += (num_samples + cont_samples - 1);
        dir = -1;
    }
    else
        dir = 1;

    dpp->weight_A = restore_weight (store_weight (dpp->weight_A));

    for (i = 0; i < 8; ++i)
        dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i]));

    if (dpp->term > MAX_TERM) {
        while (num_samples--) {
            int32_t left, sam_A;

            if (dpp->term & 1)
                sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];
            else
                sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;

            dpp->samples_A [1] = dpp->samples_A [0];
            dpp->samples_A [0] = left = in_samples [0];

            left -= apply_weight (dpp->weight_A, sam_A);
            update_weight (dpp->weight_A, dpp->delta, sam_A, left);
            dpp->sum_A += dpp->weight_A;
            out_samples [0] = left;
            in_samples += dir;
            out_samples += dir;
        }
    }
    else if (dpp->term > 0) {
Exemplo n.º 9
0
void passive_aggressive_2::train(const common::sfv_t& sfv,
                                 const string& label) {
  string incorrect_label;
  float margin = calc_margin(sfv, label, incorrect_label);
  float loss = 1.f + margin;

  if (loss < 0.f) {
    return;
  }
  float sfv_norm = squared_norm(sfv);
  if (sfv_norm == 0.f) {
    return;
  }
  update_weight(
      sfv, loss / (2 * sfv_norm + 1 / (2 * config_.C)), label, incorrect_label);
}
Exemplo n.º 10
0
void passive_aggressive_1::train(const common::sfv_t& sfv,
                                 const string& label) {
  string incorrect_label;
  float margin = calc_margin(sfv, label, incorrect_label);
  float loss = 1.f + margin;
  if (loss < 0.f) {
    storage_->register_label(label);
    return;
  }
  float sfv_norm = squared_norm(sfv);
  if (sfv_norm == 0.f) {
    storage_->register_label(label);
    return;
  }

  update_weight(
      sfv, min(config_.C, loss / (2 * sfv_norm)), label, incorrect_label);
  touch(label);
}
Exemplo n.º 11
0
static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_t num_samples, struct decorr_pass *dpp, int dir)
{
    int m = 0;

    dpp->sum_A = 0;

#ifdef MINMAX_WEIGHTS
    dpp->min = dpp->max = 0;
#endif

    if (dir < 0) {
	out_samples += (num_samples - 1);
	in_samples += (num_samples - 1);
	dir = -1;
    }
    else
	dir = 1;

    if (dpp->term > MAX_TERM) {
	while (num_samples--) {
	    int32_t left, sam_A;

	    if (dpp->term & 1)
		sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];
	    else
		sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;

	    dpp->samples_A [1] = dpp->samples_A [0];
	    dpp->samples_A [0] = left = in_samples [0];

	    left -= apply_weight (dpp->weight_A, sam_A);
	    update_weight (dpp->weight_A, dpp->delta, sam_A, left);
	    dpp->sum_A += dpp->weight_A;
#ifdef MINMAX_WEIGHTS
	    if (dpp->weight_A > dpp->max) dpp->max = dpp->weight_A;
	    if (dpp->weight_A < dpp->min) dpp->min = dpp->weight_A;
#endif
	    out_samples [0] = left;
	    in_samples += dir;
	    out_samples += dir;
	}
    }
    else if (dpp->term > 0) {
Exemplo n.º 12
0
	/*!
	 * \internal
	 *
	 * Add \a event with \a time occurence of this event to update statistics.
	 * Complexity: O(log N) on average, where N - number of containing events
	 */
	void add_event(const E &event, time_t time)
	{
		std::unique_lock<std::mutex> locker(m_lock);
		auto it = m_treap.find(event.get_key());
		if (it) {
			update_weight(*it, time, m_period, event.get_weight());
			update_frequency(*it, time, m_period, 1.);
			it->set_time(time);
			m_treap.decrease_key(it);
		} else {
			if (m_num_events < m_max_events) {
				m_treap.insert(new E(event));
				++m_num_events;
			} else {
				auto t = m_treap.top();
				m_treap.erase(t);
				*t = event;
				m_treap.insert(t);
			}
		}
	}
Exemplo n.º 13
0
static void decorr_mono_pass (int32_t *in_samples, int32_t *out_samples, uint32_t num_samples, struct decorr_pass *dpp, int dir)
{
    int m = 0, i;

    dpp->sum_A = 0;

    if (dir < 0) {
        out_samples += (num_samples - 1);
        in_samples += (num_samples - 1);
        dir = -1;
    }
    else
        dir = 1;

    dpp->weight_A = restore_weight (store_weight (dpp->weight_A));

    for (i = 0; i < 8; ++i)
        dpp->samples_A [i] = exp2s (log2s (dpp->samples_A [i]));

    if (dpp->term > MAX_TERM) {
        while (num_samples--) {
            int32_t left, sam_A;

            if (dpp->term & 1)
                sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];
            else
                sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;

            dpp->samples_A [1] = dpp->samples_A [0];
            dpp->samples_A [0] = left = in_samples [0];

            left -= apply_weight (dpp->weight_A, sam_A);
            update_weight (dpp->weight_A, dpp->delta, sam_A, left);
            dpp->sum_A += dpp->weight_A;
            out_samples [0] = left;
            in_samples += dir;
            out_samples += dir;
        }
    }
    else if (dpp->term > 0) {
Exemplo n.º 14
0
// ---------------------------------------------------------------------------------
// Functions dealing with two 1D distributions
// ---------------------------------------------------------------------------------
// The function solves the following problem:
// Given two sets of samples of two classes, a positive one and a negative one,
// a threshold-based classifier classifies a value x into a positive or a negative
// class: sign(x - \theta). The optimal \theta is chosen based on different criteria:
//  - Minimize the classification error: \lambda * p(pos)*FRR + p(neg)*FAR
//  - Minimize the error without prior: \lambda * FRR + FAR
//  - Minimize FAR with constraint FRR <= maxFRR
//  - Minimize FRR with constraint FAR <= maxFAR
// ---------------------------------------------------------------------------------
// nspc[2]: number of samples per class
// input[j]: array of input samples of class j
// sort_id: array of (j,index) sorting the joint samples in ascending order
// weight[j]: array of weights for the input samples of class j
//
// result = an array of 2 doubles representing:
//  result[0]: the threshold
//  result[1]: the optimized function value at that threshold
void sdTSolve(int criterion, double param1,
    int *nspc, double **input, int *sort_id,
    double *result, double **weight)
{
    int i, j, N = nspc[0] + nspc[1];
    double we[2], tw[2];
    double val, bval, bthresh, pos1, pos2;

    // get total weights
    if(weight)
    {
        tw[0] = cblas_dsum(nspc[0],weight[0],1);
        tw[1] = cblas_dsum(nspc[1],weight[1],1);
    }
    else
    {
        tw[0] = nspc[0];
        tw[1] = nspc[1];
    }

    // initalize everything from the left most
    we[0] = tw[0]; we[1] = 0;
    j = sort_id[0];
    i = sort_id[1];
    pos2 = input[j][i];
    bthresh = pos2 - 1;
    bval = get_value(criterion,param1,we,tw);

    // run to the right most
    while(--N > 0)
    {
        update_weight(j,i,we,weight);
        val = get_value(criterion,param1,we,tw);

        // move to the right
        pos1 = pos2;
        j = *(sort_id += 2);
        i = sort_id[1];
        pos2 = input[j][i];

        // check if better
        if(val < bval)
        {
            bval = val;
            bthresh = 0.5 * (pos1 + pos2);
        }
    }

    update_weight(j,i,we,weight);
    val = get_value(criterion,param1,we,tw);

    // check if better
    if(val < bval)
    {
        bval = val;
        bthresh = pos2 + 1;
    }

    result[0] = bthresh;
    result[1] = bval;
}