Exemple #1
0
void rank_repo(GBDT * g){
    double train_auc = auc(y_rowns(g), y_model(g), y_label(g));
    fprintf(stderr, "current tree size : %4d, train_auc : %.8f", t_size(g), train_auc);
    if (has_test(g) == 1){
        double test_auc  = auc(t_rowns(g), t_model(g), t_label(g));
        fprintf(stderr, " test_auc : %.8f", test_auc);
    }
    fprintf(stderr, "\n");
}
Exemple #2
0
void lr_repo(GBDT * g){
    double train_auc = auc(y_rowns(g), y_model(g), y_label(g));
    printf("current tree size : %4d, train_auc : %.3f", t_size(g), train_auc);
    if (has_test(g) == 1){
        double test_auc  = auc(t_rowns(g), t_model(g), t_label(g));
        printf(" test_auc : %.3f", test_auc);
    }
    printf("\n");
}
Exemple #3
0
/* --------------------------------------
 * brief : calculating auc value for LR
 * x     : current theta result
 * _ds   : dataset used for scores
 * -------------------------------------- */
double lr_auc(double *x, void *_ds){
    RDS * ds = (RDS*)_ds;
    double  *y  = ds->y;
    int     row = ds->r;
    double *val = ds->val;
    int    *id  = ds->ids;
    int    *len = ds->l;
    double * s = (double*)malloc(sizeof(double) * row);
    memset(s, 0, sizeof(double) * row);
    int offs =  0, i = 0, j = 0;
    for (i = offs = 0; i < row; i++){
        if (val){
            for (j = 0; j < len[i]; j++){
                s[i] += val[offs + j] * x[id[offs + j]]; 
            }
        }
        else{
            for (j = 0; j < len[i]; j++){
                s[i] += x[id[offs + j]]; 
            }
        }
        offs += len[i];
    }
    double auc_v = auc(ds->r, s, y);
    free(s); s = NULL;
    return auc_v;
}
Exemple #4
0
int main( int argc, char* argv[] ) {
	char c;
	while( (c = getopt( argc, argv, "+h" )) >= 0 ) {
		if( c == 'h' ) {
			usage( argv[0] );
			return 0;
		}
	}
	if( argc != optind + 2 ) {
		usage( argv[0] );
		return 1;
	}
	unsigned long negative = atol( argv[optind++] );
	unsigned long positive = atol( argv[optind++] );
	auc( negative, positive );
	return 0;
}
Exemple #5
0
void binary_class_predict(FILE *input, FILE *output){
	int    total = 0;
	int    *labels;
	int    max_nr_attr = 64;
	struct svm_node *x = Malloc(struct svm_node, max_nr_attr);
	dvec_t dec_values;
	ivec_t true_labels;


	int svm_type=svm_get_svm_type(model);
	
	if (svm_type==NU_SVR || svm_type==EPSILON_SVR){
		fprintf(stderr, "wrong svm type.");
		exit(1);
	}

	labels = Malloc(int, svm_get_nr_class(model));
	svm_get_labels(model, labels);
	
	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0

		label = strtok(line," \t");
		target_label = strtod(label,&endptr);
		if(endptr == label)
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr - 2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			++i;
		}
		x[i].index = -1;

		predict_label = svm_predict(model,x);
		fprintf(output,"%g\n",predict_label);


		double dec_value;
		svm_predict_values(model, x, &dec_value);
		true_labels.push_back((target_label > 0)? 1: -1);
		if(labels[0] <= 0) dec_value *= -1;
		dec_values.push_back(dec_value);
	}	

	validation_function(dec_values, true_labels);
  accuracy(dec_values, true_labels);
  auc(dec_values, true_labels);
  bac(dec_values, true_labels);

	free(labels);
	free(x);
}