Exemplo n.º 1
0
void do_predict(FILE *input, FILE *output)
{
    std::vector<double> pred_values; //store decision values
    std::vector<double> true_values; //store true values

	int total = 0;
	int nr_class = get_nr_class(model_);
	int * labels = Malloc(int, nr_class);
    get_labels(model_, labels);
	double * prob_estimates = NULL;
	int j, n;
	int nr_feature = get_nr_feature(model_);
	if(model_->bias >=0)
		n = nr_feature+1;
	else
		n = nr_feature;

    // not yet support multiclass
    assert(nr_class==2);

    //print out header...
    if(output_option ==2) {
		prob_estimates = Malloc(double, nr_class);
		fprintf(output,"labels");
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
    }
Exemplo n.º 2
0
bool QPredictLinearLearner::predict(QPredictDocument &doc)
{
    QPredictFeatureList &feature_list = doc.feature_list;

    int num_space = feature_list.size();
    num_space++;
    num_space++; // for bias

    struct feature_node *x_space = new struct feature_node[num_space];

    int nr_feature = get_nr_feature(m_model);
    int n;
    if (m_model->bias >= 0)
        n = nr_feature + 1;
    else
        n = nr_feature;

    sort(feature_list.begin(), feature_list.end(), QPredictFeature::feature_compare);
    const QPredictFeatureListIter &feature_end_it = feature_list.end();
    int j = 0;
    for (QPredictFeatureListIter feature_it = feature_list.begin(); feature_it != feature_end_it; ++feature_it) {
        x_space[j].index = feature_it->id;
        x_space[j].value = feature_it->value;
        ++j;
    }

    if(m_model->bias >= 0) {
        x_space[j].index = n;
        x_space[j].value = m_model->bias;
        ++j;
    }

    x_space[j].index = -1;
    x_space[j].value = -1;

    if (check_probability_model(m_model)) {
        doc.predict_class_index = static_cast<uint32_t>(
                ::predict_probability(m_model, x_space, doc.predict_class_probs)
                );
    } else {
        doc.predict_class_index = static_cast<uint32_t>(
                ::predict(m_model, x_space)
                );
    }

    delete []x_space;

    return true;
}
Exemplo n.º 3
0
void do_predict(FILE *input, FILE *output)
{
	int correct = 0;
	int total = 0;
	double error = 0;
	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

	int nr_class=get_nr_class(model_);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_);
	if(model_->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_,labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t\n");
		if(label == NULL) // empty line
			exit_input_error(total+1);

		target_label = strtod(label,&endptr);
		if(endptr == label || *endptr != '\0')
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			// feature indices larger than those in training are not used
			if(x[i].index <= nr_feature)
				++i;
		}

		if(model_->bias>=0)
		{
			x[i].index = n;
			x[i].value = model_->bias;
			i++;
		}
		x[i].index = -1;

		if(model_->normal){
			double length = 0;
			for(int kk = 0; x[kk].index != -1; kk++)
				length += x[kk].value * x[kk].value;
	
			length = sqrt(length);
			
			for(int kk = 0; x[kk].index != -1; kk++)
				x[kk].value /= length;
		}

		if(flag_predict_probability)
		{
			int j;
			predict_label = predict_probability(model_,x,prob_estimates);
			fprintf(output,"%g",predict_label);
			for(j=0;j<model_->nr_class;j++)
				fprintf(output," %g",prob_estimates[j]);
			fprintf(output,"\n");
		}
		else
		{
			predict_label = predict(model_,x);
			fprintf(output,"%g\n",predict_label);
		}

		if(predict_label == target_label)
			++correct;
		error += (predict_label-target_label)*(predict_label-target_label);
		sump += predict_label;
		sumt += target_label;
		sumpp += predict_label*predict_label;
		sumtt += target_label*target_label;
		sumpt += predict_label*target_label;
		++total;
	}
	if(model_->param.solver_type==L2R_L2LOSS_SVR ||
	   model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
	   model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
	{
		info("Mean squared error = %g (regression)\n",error/total);
		info("Squared correlation coefficient = %g (regression)\n",
			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
			);
	}
	else
		info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	if(flag_predict_probability)
		free(prob_estimates);
}
Exemplo n.º 4
0
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag)
{
	int label_vector_row_num, label_vector_col_num;
	int feature_number, testing_instance_number;
	int instance_index;
	double *ptr_label, *ptr_predict_label;
	double *ptr_prob_estimates, *ptr_dec_values, *ptr;
	struct feature_node *x;
	mxArray *pplhs[1]; // instance sparse matrix in row format

	int correct = 0;
	int total = 0;
	double error = 0;
	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

	int nr_class=get_nr_class(model_);
	int nr_w;
	double *prob_estimates=NULL;

	if(nr_class==2 && model_->param.solver_type!=MCSVM_CS)
		nr_w=1;
	else
		nr_w=nr_class;

	// prhs[1] = testing instance matrix
	feature_number = get_nr_feature(model_);
	testing_instance_number = (int) mxGetM(prhs[1]);
	if(col_format_flag)
	{
		feature_number = (int) mxGetM(prhs[1]);
		testing_instance_number = (int) mxGetN(prhs[1]);
	}

	label_vector_row_num = (int) mxGetM(prhs[0]);
	label_vector_col_num = (int) mxGetN(prhs[0]);

	if(label_vector_row_num!=testing_instance_number)
	{
		mexPrintf("Length of label vector does not match # of instances.\n");
		fake_answer(plhs);
		return;
	}
	if(label_vector_col_num!=1)
	{
		mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
		fake_answer(plhs);
		return;
	}

	ptr_label    = mxGetPr(prhs[0]);

	// transpose instance matrix
	if(col_format_flag)
		pplhs[0] = (mxArray *)prhs[1];
	else
	{
		mxArray *pprhs[1];
		pprhs[0] = mxDuplicateArray(prhs[1]);
		if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
		{
			mexPrintf("Error: cannot transpose testing instance matrix\n");
			fake_answer(plhs);
			return;
		}
	}


	prob_estimates = Malloc(double, nr_class);

	plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
	if(predict_probability_flag)
		plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
	else
		plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL);

	ptr_predict_label = mxGetPr(plhs[0]);
	ptr_prob_estimates = mxGetPr(plhs[2]);
	ptr_dec_values = mxGetPr(plhs[2]);
	x = Malloc(struct feature_node, feature_number+2);
	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
	{
		int i;
		double target_label, predict_label;

		target_label = ptr_label[instance_index];

		// prhs[1] and prhs[1]^T are sparse
		read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);

		if(predict_probability_flag)
		{
			predict_label = predict_probability(model_, x, prob_estimates);
			ptr_predict_label[instance_index] = predict_label;
			for(i=0;i<nr_class;i++)
				ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
		}
		else
		{
			double *dec_values = Malloc(double, nr_class);
			predict_label = predict_values(model_, x, dec_values);
			ptr_predict_label[instance_index] = predict_label;

			for(i=0;i<nr_w;i++)
				ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
			free(dec_values);
		}

		if(predict_label == target_label)
			++correct;
		error += (predict_label-target_label)*(predict_label-target_label);
		sump += predict_label;
		sumt += target_label;
		sumpp += predict_label*predict_label;
		sumtt += target_label*target_label;
		sumpt += predict_label*target_label;

		++total;
	}
	
	if(model_->param.solver_type==L2R_L2LOSS_SVR || 
           model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 
           model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
        {
                mexPrintf("Mean squared error = %g (regression)\n",error/total);
                mexPrintf("Squared correlation coefficient = %g (regression)\n",
                       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
                       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
                       );
        }
	//else
		//mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);

	// return accuracy, mean squared error, squared correlation coefficient
	plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
	ptr = mxGetPr(plhs[1]);
	ptr[0] = (double)correct/total*100;
	ptr[1] = error/total;
	ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
				((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt));

	free(x);
	if(prob_estimates != NULL)
		free(prob_estimates);
}
Exemplo n.º 5
0
int QPredictLinearLearner::num_of_features()
{
    assert(m_model != NULL);
    return get_nr_feature(m_model);
}
Exemplo n.º 6
0
void do_predict(FILE *input, FILE *output, struct model* model_)
{
	int correct = 0;
	int total = 0;

	int nr_class=get_nr_class(model_);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_);
	if(model_->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_,labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");		
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		int target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t");
		target_label = (int) strtol(label,&endptr,10);
		if(endptr == label)
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			// feature indices larger than those in training are not used
			if(x[i].index <= nr_feature)
				++i;
		}

		if(model_->bias>=0)
		{
			x[i].index = n;
			x[i].value = model_->bias;
			i++;
		}
		x[i].index = -1;

		if(flag_predict_probability)
		{
			int j;
			predict_label = predict_probability(model_,x,prob_estimates);
			fprintf(output,"%d",predict_label);
			for(j=0;j<model_->nr_class;j++)
				fprintf(output," %g",prob_estimates[j]);
			fprintf(output,"\n");
		}
		else
		{
			predict_label = predict(model_,x);
			fprintf(output,"%d\n",predict_label);
		}

		if(predict_label == target_label)
			++correct;
		++total;
	}
	printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	if(flag_predict_probability)
		free(prob_estimates);
}
Exemplo n.º 7
0
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag)
{
    int label_vector_row_num, label_vector_col_num;
    int feature_number, testing_instance_number;
    int instance_index;
    double *ptr_instance, *ptr_label, *ptr_predict_label;
    double *ptr_prob_estimates, *ptr_dec_values, *ptr;
    struct feature_node *x;
    mxArray *pplhs[1]; // instance sparse matrix in row format

    int correct = 0;
    int total = 0;

    int nr_class=get_nr_class(model_);
    int nr_w;
    double *prob_estimates=NULL;

    if(nr_class==2 && model_->param.solver_type!=MCSVM_CS)
        nr_w=1;
    else
        nr_w=nr_class;

    // prhs[1] = testing instance matrix
    feature_number = get_nr_feature(model_);
    testing_instance_number = (int) mxGetM(prhs[1]);
    if(col_format_flag)
    {
        feature_number = (int) mxGetM(prhs[1]);
        testing_instance_number = (int) mxGetN(prhs[1]);
    }

    label_vector_row_num = (int) mxGetM(prhs[0]);
    label_vector_col_num = (int) mxGetN(prhs[0]);

    if(label_vector_row_num!=testing_instance_number)
    {
        mexPrintf("Length of label vector does not match # of instances.\n");
        fake_answer(plhs);
        return;
    }
    if(label_vector_col_num!=1)
    {
        mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
        fake_answer(plhs);
        return;
    }

    ptr_instance = mxGetPr(prhs[1]);
    ptr_label    = mxGetPr(prhs[0]);

    // transpose instance matrix
    if(mxIsSparse(prhs[1]))
    {
        if(col_format_flag)
        {
            pplhs[0] = (mxArray *)prhs[1];
        }
        else
        {
            mxArray *pprhs[1];
            pprhs[0] = mxDuplicateArray(prhs[1]);
            if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
            {
                mexPrintf("Error: cannot transpose testing instance matrix\n");
                fake_answer(plhs);
                return;
            }
        }
    }
    else
        mexPrintf("Testing_instance_matrix must be sparse\n");


    prob_estimates = Malloc(double, nr_class);

    plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
    if(predict_probability_flag)
        plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
    else
        plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL);

    ptr_predict_label = mxGetPr(plhs[0]);
    ptr_prob_estimates = mxGetPr(plhs[2]);
    ptr_dec_values = mxGetPr(plhs[2]);
    x = Malloc(struct feature_node, feature_number+2);
    for(instance_index=0; instance_index<testing_instance_number; instance_index++)
    {
        int i;
        double target,v;

        target = ptr_label[instance_index];

        // prhs[1] and prhs[1]^T are sparse
        read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);

        if(predict_probability_flag)
        {
            v = predict_probability(model_, x, prob_estimates);
            ptr_predict_label[instance_index] = v;
            for(i=0; i<nr_class; i++)
                ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
        }
        else
        {
            double *dec_values = Malloc(double, nr_class);
            v = predict(model_, x);
            ptr_predict_label[instance_index] = v;

            predict_values(model_, x, dec_values);
            for(i=0; i<nr_w; i++)
                ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
            free(dec_values);
        }

        if(v == target)
            ++correct;
        ++total;
    }
    mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);

    // return accuracy, mean squared error, squared correlation coefficient
    plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
    ptr = mxGetPr(plhs[1]);
    ptr[0] = (double) correct/total*100;

    free(x);
    if(prob_estimates != NULL)
        free(prob_estimates);
}
Exemplo n.º 8
0
void binary_class_predict(FILE *input, FILE *output){
	int    total = 0;
	int    *labels;
	int    max_nr_attr = 64;
	struct feature_node *x = Malloc(struct feature_node, max_nr_attr);
	dvec_t dec_values;
	ivec_t true_labels;
	int n;
	if(model_->bias >= 1)
		n = get_nr_feature(model_) + 1;	
	else
		n = get_nr_feature(model_);


	labels = Malloc(int, get_nr_class(model_));
	get_labels(model_, labels);
	
	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0

		label = strtok(line," \t");
		target_label = strtod(label,&endptr);
		if(endptr == label)
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr - 2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			++i;
		}
		
		if(model_->bias >= 0){
			x[i].index = n;
			x[i].value = model_->bias; 
			++i;	
		}
	
		x[i].index = -1;

		predict_label = predict(model_,x);
		fprintf(output,"%g\n",predict_label);


		double dec_value;
		predict_values(model_, x, &dec_value);
		true_labels.push_back((target_label > 0)? 1: -1);
		if(labels[0] <= 0) dec_value *= -1;
		dec_values.push_back(dec_value);
	}	

	validation_function(dec_values, true_labels);

	free(labels);
	free(x);
}
Exemplo n.º 9
0
void do_predict(FILE *input, FILE *output)
{
	int total=0;
	int n;
	int nr_feature=get_nr_feature(model_);
	double *dvec_t;
	double *ivec_t;
	int *query;
	n=nr_feature;

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
		total++;
	rewind(input);
	dvec_t = new double[total];
	ivec_t = new double[total];
	query = new int[total];
	total = 0;
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		query[total] = 0;
		label = strtok(line," \t\n");
		if(label == NULL) // empty line
			exit_input_error(total+1);

		target_label = strtod(label,&endptr);
		if(endptr == label || *endptr != '\0')
			exit_input_error(total+1);
		ivec_t[total] = target_label;

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;

			if (strcmp(idx,"qid") == 0)
			{
				errno = 0;
				query[total] = (int) strtol(val,&endptr,10);
				if(endptr == val || errno != 0 || *endptr != '\0')
					exit_input_error(i+1);
				continue;
			}
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			// feature indices larger than those in training are not used
			if(x[i].index <= nr_feature)
				++i;
		}
		x[i].index = -1;

		predict_label = predict(model_,x);
		fprintf(output,"%.10f\n",predict_label);
		dvec_t[total++] = predict_label;
	}
	double result[3];
	eval_list(ivec_t,dvec_t,query,total,result);
	info("Pairwise Accuracy = %g%%\n",result[0]*100);
	info("MeanNDCG (LETOR) = %g\n",result[1]);
	info("NDCG (YAHOO) = %g\n",result[2]);
}
Exemplo n.º 10
0
void do_predict(FILE *input, FILE *output)
{
    int total = 0;

    int nr_class=get_nr_class(model_);
    double *prob_estimates=NULL;
    int n;
    int nr_feature=get_nr_feature(model_);
    if(model_->bias>=0)
        n=nr_feature+1;
    else
        n=nr_feature;

    if(!check_probability_model(model_))
    {
        fprintf(stderr, "probability output is only supported for logistic regression\n");
        exit(1);
    }

    prob_estimates = (double *) malloc(nr_class*sizeof(double));

    max_line_len = 1024;
    line = (char *)malloc(max_line_len*sizeof(char));
    int clicks = 0;
    int shows = 0;
    while(readline(input) != NULL)
    {
        int i = 0;
        double target_ctr, predict_ctr;
        char *idx, *val, *endptr;

        int inst_max_index = 0; // strtol gives 0 if wrong format

        char *p = strtok(line," \t\n"); //clicks
        if(p == NULL) // empty line
            exit_input_error(total+1);

        clicks = atoi(p);
        p = strtok(NULL," \t"); // shows
        shows = atoi(p);
        p = strtok(NULL," \t"); // qid:1

        if (shows <=0 || clicks > shows) {
            continue;
        }

        target_ctr = (double)clicks / shows;

        while(1)
        {
            if(i>=max_nr_attr-2)	// need one more for index = -1
            {
                max_nr_attr *= 2;
                x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
            }

            idx = strtok(NULL,":");
            val = strtok(NULL," \t");

            if(val == NULL)
                break;
            errno = 0;
            x[i].index = (int) strtol(idx,&endptr,10);
            if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
                exit_input_error(total+1);
            else
                inst_max_index = x[i].index;

            errno = 0;
            x[i].value = strtod(val,&endptr);
            if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
                exit_input_error(total+1);

            // feature indices larger than those in training are not used
            if(x[i].index <= nr_feature)
                ++i;
        }

        if(model_->bias>=0)
        {
            x[i].index = n;
            x[i].value = model_->bias;
            i++;
        }
        x[i].index = -1;

        predict_probability(model_,x,prob_estimates);
        fprintf(output,"%d %d ", clicks, shows);
        predict_ctr = prob_estimates[0];
        fprintf(output," %g\n", predict_ctr);
    }

    info("total:%d\n",total);

    free(prob_estimates);
}
Exemplo n.º 11
0
void do_predict(FILE *input, FILE *output)
{
	int correct = 0;
	int total = 0;
	double error = 0;
	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

	int nr_class=get_nr_class(model_[0]);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_[0]);
	if(model_[0]->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_[0]))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_[0],labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t\n");
		if(label == NULL) // empty line
			exit_input_error(total+1);

		// target_label = strtod(label,&endptr);
		switch (label[0]) {
			case 'A': target_label = 0; break;
			case 'B': target_label = 1; break;
			case 'C': target_label = 1; break;
			case 'D': target_label = 1; break;
		}
		// if(endptr == label || *endptr != '\0')
		// 	exit_input_error(total+1);
		for (int pid = 0; pid < sum_pro; pid++) {
			while(1)
			{
				if(i>=max_nr_attr-2)	// need one more for index = -1
				{
					max_nr_attr *= 2;
					x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
				}

				idx = strtok(NULL,":");
				val = strtok(NULL," \t");

				if(val == NULL)
					break;
				errno = 0;
				x[i].index = (int) strtol(idx,&endptr,10);
				if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
					exit_input_error(total+1);
				else
					inst_max_index = x[i].index;

				errno = 0;
				x[i].value = strtod(val,&endptr);
				if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
					exit_input_error(total+1);

				// feature indices larger than those in training are not used
				if(x[i].index <= nr_feature)
					++i;
			}

			if(model_[pid]->bias>=0)
			{
				x[i].index = n;
				x[i].value = model_[pid]->bias;
				i++;
			}
			x[i].index = -1;

			if(flag_predict_probability)
			{
				int j;
				predict_label = predict_probability(model_[pid],x,prob_estimates);
				fprintf(output,"%g",predict_label);
				for(j=0;j<model_[pid]->nr_class;j++)
					fprintf(output," %g",prob_estimates[j]);
				fprintf(output,"\n");
			}
			else
			{
				p_label[pid] = predict(model_[pid],x);
				fprintf(output,"%g", p_label[pid]);

				// printf("pid%dhas done\n",pid );
			}
			fprintf(output, "\n" );
		}
		int count = 0;
		predict_label = 0;
		// for ( int l = 0; l < BLOCK ; l++) {
		// 	for (int m = 0;m < BLOCK * N; m++) {
		// 		// printf("%f\t", p_label[l * BLOCK + m]);
		// 		if ( p_label[l * BLOCK + m] == 1) {
		// 			// p_label[l] = 1;
		// 			// break;
		// 			p_label[l]++;
		// 			// count++;* 4
		// 		}
		// 	}
		// 	if (p_label[l] < 4) {
		// 		count++;
		// 	}
		// 	// if ( p_label[l] == 1) {
		// 	// 	predict_label = 1;
		// 	// }
		// 	// else {
		// 	// 	predict_label = 0;
		// 	// }
		// 	// if (count >0) {
		// 	// 	predict_label = 1;
		// 	// }
		// 	// else {
		// 	// 	predict_label = 0;
		// 	// }
		// }

		// if (count > 0 ) {
		// 	predict_label = 0;
		// 	}
		// else {
		// 	predict_label = 1;
		// }
		// /printf("\n");
		// fprintf(output,"%g\n",predict_label);

		// if(predict_label == target_label)
		// 	++correct;
		// error += (predict_label-target_label)*(predict_label-target_label);
		// sump += predict_label;
		// sumt += target_label;
		// sumpp += predict_label*predict_label;
		// sumtt += target_label*target_label;
		// sumpt += predict_label*target_label;
		// ++total;
	}
	// if(check_regression_model(model_[0]))
	// {
	// 	info("Mean squared error = %g (regression)\n",error/total);
	// 	info("Squared correlation coefficient = %g (regression)\n",
	// 		((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
	// 		((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
	// 		);
	// }
	// else
	// 	info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	// if(flag_predict_probability)
	// 	free(prob_estimates);
}
Exemplo n.º 12
0
void do_predict(FILE *input, FILE *output, struct model* model_)
{
	int correct = 0;
	int total = 0;

	int nr_class=get_nr_class(model_);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_);
	if(model_->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(model_->param.solver_type!=L2_LR)
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			return;
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_,labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");		
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}
	while(1)
	{
		int i = 0;
		int c;
		double target;
		int target_label, predict_label;

		if (fscanf(input,"%lf",&target)==EOF)
			break;
		target_label=(int)target;

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			do {
				c = getc(input);
				if(c=='\n' || c==EOF) goto out2;
			} while(isspace(c));
			ungetc(c,input);
			if (fscanf(input,"%d:%lf",&x[i].index,&x[i].value) < 2)
			{
				fprintf(stderr,"Wrong input format at line %d\n", total+1);
				exit(1);
			}
			// feature indices larger than those in training are not used
			if(x[i].index<=nr_feature)
				++i;
		}

out2:
		if(model_->bias>=0)
		{
			x[i].index = n;
			x[i].value = model_->bias;
			i++;
		}
		x[i].index = -1;

		if(flag_predict_probability)
		{
			int j;
			predict_label = predict_probability(model_,x,prob_estimates);
			fprintf(output,"%d ",predict_label);
			for(j=0;j<model_->nr_class;j++)
				fprintf(output,"%g ",prob_estimates[j]);
			fprintf(output,"\n");
		}
		else
		{
			predict_label = predict(model_,x);
			fprintf(output,"%d\n",predict_label);
		}

		if(predict_label == target_label)
			++correct;
		++total;
	}
	printf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total);
	if(flag_predict_probability)
		free(prob_estimates);
}