/******************************************************************************** Function to compute the intercept of the line which maximize the Fscore among the lines whose slope is tan(theta_best) and the intercepts those contained in the vector c_values. The lines are scanned according to their intercepts in an increasing order; and for each line the initial value of true positive(tp), false positive(fp) and false negative (fn) is updated according to label("labels" vector) of the point associated with the current line. INPUT size: number of lines to be analyzed tp: initial value of true positive fp: initial value of false positive tn: initial value of true negative fn: initial value of false negative labels: vector containing the labels of the points associated with the lines c_values: vector containing the intercepts of the lines to be scanned order_c_values: vector containing the intercepts sorted in increasing orderof the lines to be scanned max_F: output variable containing the optimal Fscore value c_best: output variable containing the intercept corresponding to the optimal Fscore value, double theta_best: the optimal angle previously computed */ void compute_best_c(int size, int tp, int fp, int tn, int fn, int *labels, double *c_values, int *order_c_values, double *max_F, double *c_best, double theta_best){ register int i, h; int cnt = 0, pos_labels = 0, neg_labels = 0; double tmp_hmean_under; *max_F = compute_F(tp, fn, fp); for(i = 0; i < size; i++) { cnt = 0; pos_labels = 0; neg_labels = 0; // counting the number of collinear points while(c_values[i] == c_values[i + cnt + 1] && (i + cnt + 1) < size)cnt++; for(h = 0; h <= cnt; h++) { if(labels[ order_c_values[i + h] ] > 0) pos_labels++; else neg_labels++; } // updating fscore tp += pos_labels; fn -= pos_labels; fp += neg_labels; tn -= neg_labels; // compute the F-score relative to the current line when the positive half-plane is that under the line tmp_hmean_under = compute_F(tp, fn, fp); // check whether current hmean is greater than actual maximum Fscore if(tmp_hmean_under > *max_F){ *max_F = tmp_hmean_under; *c_best = c_values[i]; } i = i + cnt; } }
/***************************************************************************** * evolve * *****************************************************************************/ static PyObject *evolve(PyObject *self, PyObject *args) { // Variable declarations. npy_int64 N, threads, steps, step, i, xi, yi; npy_float64 dt; PyArrayObject *py_m, *py_r, *py_v, *py_F, *py_Ft; npy_float64 *m, *r, *v, *F, *Ft; // Parse variables. if(!PyArg_ParseTuple(args, "ldllO!O!O!O!O!", &threads, &dt, &steps, &N, &PyArray_Type, &py_m, &PyArray_Type, &py_r, &PyArray_Type, &py_v, &PyArray_Type, &py_F, &PyArray_Type, &py_Ft)) { return NULL; } omp_set_num_threads(threads); // Get underlying arrays from numpy arrays. m = (npy_float64*)PyArray_DATA(py_m); r = (npy_float64*)PyArray_DATA(py_r); v = (npy_float64*)PyArray_DATA(py_v); F = (npy_float64*)PyArray_DATA(py_F); Ft = (npy_float64*)PyArray_DATA(py_Ft); // Evolve the world. for(step = 0; step < steps; ++step) { compute_F(threads, N, m, r, F, Ft); #pragma omp parallel for private(i, xi, yi) for(i = 0; i < N; ++i) { xi = 2 * i; yi = xi + 1; v[xi] += F[xi] * dt / m[i]; v[yi] += F[yi] * dt / m[i]; r[xi] += v[xi] * dt; r[yi] += v[yi] * dt; } } Py_RETURN_NONE; }
/***************************************************************************** * evolve * *****************************************************************************/ static PyObject *evolve(PyObject *self, PyObject *args) { // Declare variables. npy_int64 N, threads, steps, step, i, xi, yi; npy_float64 dt; PyArrayObject *py_m, *py_r, *py_v, *py_F; npy_float64 *m, *r, *v, *F; // Parse arguments. if (!PyArg_ParseTuple(args, "ldllO!O!O!O!", &threads, &dt, &steps, &N, &PyArray_Type, &py_m, &PyArray_Type, &py_r, &PyArray_Type, &py_v, &PyArray_Type, &py_F)) { return NULL; } // Get underlying arrays from numpy arrays. m = (npy_float64*)PyArray_DATA(py_m); r = (npy_float64*)PyArray_DATA(py_r); v = (npy_float64*)PyArray_DATA(py_v); F = (npy_float64*)PyArray_DATA(py_F); // Evolve the world. for(step = 0; step < steps; ++step) { compute_F(N, m, r, F); for(i = 0; i < N; ++i) { xi = 2 * i; yi = xi + 1; v[xi] += F[xi] * dt / m[i]; v[yi] += F[yi] * dt / m[i]; r[xi] += v[xi] * dt; r[yi] += v[yi] * dt; } } Py_RETURN_NONE; }
/******************************************************************************** This function compute the line among those crossing each labeled point (pos_vect[i], neg_vect[i]) which correspond to the highest Fscore. The points labels are contained in the vector "labels. The algorithm is made up by two steps: Step 1) compute the line (hence its angle alpha) which maximizes the Fscore among those crossing the origin and the points (pos_vect[i], neg_vect[i]) Step 2) compute the line (hence its intercept) which maximizes the Fscore among those having slope tan(alpha) and crossing the points (pos_vect[i], neg_vect[i]) During the optinization, for each line two possibility are considered: the positive half-plane is above or below the line. The variable "positive_halfplane" will contain the choice which correspond to the highest Fscore. INPUT pos_vect: vector in which position "i" contains the weighted sum of positive neighbors of item "i" neg_vect: vector in which position "i" contains the weighted sum of negative neighbors of item "i" labels: vector containg the item labels n: number of points theta: output variable which will contain the angle formed with the x axis by the line corresponding to the highest Fscore c: output variable which will contain the optimal threshold c = -q*cos(theta), where q is the intercept corresponding to the optimal line opt_hmean: output variable containing the Fscore corresponding to the optimum line positive_halfplane: output variable containing the position of the positive half-plane: 1 over, -1 under the line */ void error_minimization(double *pos_vect, double *neg_vect, int *labels, int *n, double *theta, double *c, double *opt_hmean, int *positive_halfplane) { int N_pos = 0, pos_halfplane = 0, N_neg, tp_o, fn_o, fp_o, tn_o, tp_u, fn_u, fp_u, tn_u, cnt, pos_labels, neg_labels, opt_fp_o = 0, opt_fp_u = 0, opt_fn_o=0, opt_fn_u=0, opt_tp_o=0, opt_tp_u=0; register int i, h; const int n_ = (*n); int order_thetas[n_], order_c_values[n_]; double max_F = 0, theta_best = 0, c_best = 0, opt_hmean_over = 0, opt_hmean_under = 0; double theta_best_over, theta_best_under=0,tmp_hmean_under, tmp_hmean_over; double thetas[n_], c_values[n_]; // finding the number of positive labels N_pos = count_positives(labels, n_); N_neg = n_ - N_pos; // initial errors when positive halfplane 'over' the line tp_o = N_pos; fp_o = N_neg; tn_o = 0; fn_o = 0; tmp_hmean_over = compute_F(tp_o, fn_o, fp_o); // initial errors when positive halfplane 'under' the line tp_u = 0; fp_u = 0; tn_u = N_neg; fn_u = N_pos; tmp_hmean_under = 0.0; // computing the angles of each line passing through the origin and a point of the training set compute_angles(pos_vect, neg_vect, order_thetas, thetas, n_); // sorting angles and their indices quicksort(thetas, order_thetas, 0, (n_)-1); // scanning ordered angles to find the optimum line for(i = 0; i < n_; i++) { if(thetas[i] >= 1.57)break; cnt = 0; pos_labels = 0; neg_labels = 0; // counting the number of collinear points while(thetas[i] == thetas[i + cnt + 1]) cnt++; if(i != (n_-1)){ for(h = 0; h <= cnt; h++) { if(labels[ order_thetas[i + h] ] > 0) pos_labels++; else neg_labels++; } } // updating actual errors tp_o -= pos_labels; fn_o += pos_labels; fp_o -= neg_labels; tn_o += neg_labels; tp_u += pos_labels; fn_u -= pos_labels; fp_u += neg_labels; tn_u -= neg_labels; tmp_hmean_over = compute_F(tp_o, fn_o, fp_o); tmp_hmean_under = compute_F(tp_u, fn_u, fp_u); // check whether current F-scores is greater than actual maximum Fscores check_update(tmp_hmean_under, &opt_hmean_under, &theta_best_under, thetas[i], &opt_tp_u, &opt_fp_u, &opt_fn_u, tp_u, fp_u, fn_u); check_update(tmp_hmean_over, &opt_hmean_over, &theta_best_over, thetas[i], &opt_tp_o, &opt_fp_o, &opt_fn_o, tp_o, fp_o, fn_o); // increment in order to avoid to consider again collinear points i = i + cnt; } // choosing the optimum half-plane, fscore and angle check_halfplane(opt_hmean_over, opt_hmean_under, theta_best_over, theta_best_under, &pos_halfplane, &max_F, &theta_best); // ------- Step 2: computing best intercept--------------- compute_c(pos_vect, neg_vect, order_c_values, c_values, theta_best, n_); // sorting intercepts and their indices quicksort(c_values, order_c_values, 0, n_-1); tp_u = 0; fp_u = 0; tn_u = N_neg; fn_u = N_pos; compute_best_c(*n, tp_u, fp_u, tn_u, fn_u, labels, c_values, order_c_values, &max_F, &c_best, theta_best); theta_best = theta_best + DBL_MIN; *opt_hmean = max_F; *theta = theta_best; *c = -c_best * cos(*theta); *positive_halfplane = (int)pos_halfplane; }