// return classification error and the normalized difference between predicted and true sentiment
std::pair<double, double> do_predict(const struct problem *test_prob, struct model* model_)
{
  double acc = 0;
  double clse=0;
  int total = 0;
  double *prob_estimates=NULL;
  int *labels=NULL;
  int nr_class=get_nr_class(model_);
  if(flag_predict_probability)
    {
      if(!check_probability_model(model_))
	{
	  fprintf(stderr, "probability output is only supported for logistic regression\n");
	  exit(1);
	}
      
      labels=(int *) malloc(nr_class*sizeof(int));
      get_labels(model_,labels);
      prob_estimates = (double *) malloc(nr_class*sizeof(double));
    }

  int l = test_prob->l;
  int i = 0;
  for(i=0; i<l; i++)
    {
      int predict_label = 0;
      int target_label=test_prob->y[i];
      feature_node *xi = test_prob->x[i];
      if(flag_predict_probability)
	{
	  int j;
	  predict_label = predict_probability(model_,xi,prob_estimates);
	  double predict_score=0;
	  for(j=0;j<model_->nr_class;j++)
	    predict_score+=prob_estimates[j]*labels[j];
	  //double acc_max= fabs(target_label-3)+2;
	  //acc+=(acc_max-sqrt((predict_score - target_label)*(predict_score - target_label)))/acc_max;
	  acc += (predict_score - target_label) * (predict_score - target_label);
	  if (predict_label!=target_label)
	    clse++;
	}
      else
	{
	  predict_label = predict(model_,xi);
	  //double acc_max= fabs(target_label-3)+2;
	  //acc+=(acc_max-sqrt((predict_label - target_label)*(predict_label - target_label)))/acc_max;
          acc += (predict_label - target_label) * (predict_label - target_label);
          if (predict_label!=target_label)
	    clse++;
	}
      ++total;
    }
  if(flag_predict_probability)
    {
      free(prob_estimates);
      free(labels);
    }
  //printf("Error = %g%% (%d/%d)\n",(double) (total-correct)/total*100,total-correct,total);
  return std::make_pair(clse/total,acc/total) ;
}
Ejemplo n.º 2
0
bool QPredictLinearLearner::predict(QPredictDocument &doc)
{
    QPredictFeatureList &feature_list = doc.feature_list;

    int num_space = feature_list.size();
    num_space++;
    num_space++; // for bias

    struct feature_node *x_space = new struct feature_node[num_space];

    int nr_feature = get_nr_feature(m_model);
    int n;
    if (m_model->bias >= 0)
        n = nr_feature + 1;
    else
        n = nr_feature;

    sort(feature_list.begin(), feature_list.end(), QPredictFeature::feature_compare);
    const QPredictFeatureListIter &feature_end_it = feature_list.end();
    int j = 0;
    for (QPredictFeatureListIter feature_it = feature_list.begin(); feature_it != feature_end_it; ++feature_it) {
        x_space[j].index = feature_it->id;
        x_space[j].value = feature_it->value;
        ++j;
    }

    if(m_model->bias >= 0) {
        x_space[j].index = n;
        x_space[j].value = m_model->bias;
        ++j;
    }

    x_space[j].index = -1;
    x_space[j].value = -1;

    if (check_probability_model(m_model)) {
        doc.predict_class_index = static_cast<uint32_t>(
                ::predict_probability(m_model, x_space, doc.predict_class_probs)
                );
    } else {
        doc.predict_class_index = static_cast<uint32_t>(
                ::predict(m_model, x_space)
                );
    }

    delete []x_space;

    return true;
}
Ejemplo n.º 3
0
double LVlinear_predict_probability(lvError *lvErr, const LVlinear_model  *model_in, const LVArray_Hdl<LVlinear_node> x_in, LVArray_Hdl<double> prob_estimates_out){
	try{
		// Input validation: Uninitialized model
		if (model_in == nullptr || model_in->w == nullptr || (*model_in->w)->dimSize == 0)
			throw LVException(__FILE__, __LINE__, "Uninitialized model passed to liblinear_predict_probability.");

		// Input validation: Empty feature vector
		if (x_in == nullptr || (*x_in)->dimSize == 0)
			throw LVException(__FILE__, __LINE__, "Empty feature vector passed to liblinear_predict_probability.");

		// Input validation: Final index -1?
		if ((*x_in)->elt[(*x_in)->dimSize - 1].index != -1)
			throw LVException(__FILE__, __LINE__, "The index of the last element of the feature vector needs to be -1 (liblinear_predict_probability).");

		// Convert LVsvm_model to svm_model
		auto mdl = std::make_unique<model>();
		LVConvertModel(*model_in, *mdl);

		// Check probability model
		int valid_probability = check_probability_model(mdl.get());
		if (!valid_probability)
			throw LVException(__FILE__, __LINE__, "The selected solver type does not support probability output.");

		// Allocate room for probability estimates
		LVResizeNumericArrayHandle(prob_estimates_out, mdl->nr_class);
		(*prob_estimates_out)->dimSize = mdl->nr_class;

		double highest_prob_label = predict_probability(mdl.get(), reinterpret_cast<feature_node*>((*x_in)->elt), (*prob_estimates_out)->elt);

		return highest_prob_label;
	}
	catch (LVException &ex) {
		ex.returnError(lvErr);
		(*prob_estimates_out)->dimSize = 0;
		return std::nan("");
	}

	catch (std::exception &ex) {
		LVException::returnStdException(lvErr, __FILE__, __LINE__, ex);
		(*prob_estimates_out)->dimSize = 0;
		return std::nan("");
	}
	catch (...) {
		LVException ex(__FILE__, __LINE__, "Unknown exception has occurred");
		ex.returnError(lvErr);
		(*prob_estimates_out)->dimSize = 0;
		return std::nan("");
	}
}
double LVlinear_predict_probability(lvError *lvErr, const LVlinear_model  *model_in, const LVArray_Hdl<LVlinear_node> x_in, LVArray_Hdl<double> prob_estimates_out){
	try{
		// Convert LVsvm_model to svm_model
		std::unique_ptr<model> model(new model);
		LVConvertModel(model_in, model.get());

		// Check probability model
		int valid_probability = check_probability_model(model.get());
		if (!valid_probability)
			throw LVException(__FILE__, __LINE__, "The model does not support probability output.");

		// Allocate room for probability estimates
		LVResizeNumericArrayHandle(prob_estimates_out, model->nr_class);
		(*prob_estimates_out)->dimSize = model->nr_class;

		double highest_prob_label = predict_probability(model.get(), reinterpret_cast<feature_node*>((*x_in)->elt), (*prob_estimates_out)->elt);

		return highest_prob_label;
	}
	catch (LVException &ex) {
		ex.returnError(lvErr);
		(*prob_estimates_out)->dimSize = 0;
		return std::nan("");
	}

	catch (std::exception &ex) {
		LVException::returnStdException(lvErr, __FILE__, __LINE__, ex);
		(*prob_estimates_out)->dimSize = 0;
		return std::nan("");
	}
	catch (...) {
		LVException ex(__FILE__, __LINE__, "Unknown exception has occurred");
		ex.returnError(lvErr);
		(*prob_estimates_out)->dimSize = 0;
		return std::nan("");
	}
}
Ejemplo n.º 5
0
void do_predict(FILE *input, FILE *output)
{
	int correct = 0;
	int total = 0;
	double error = 0;
	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

	int nr_class=get_nr_class(model_);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_);
	if(model_->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_,labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t\n");
		if(label == NULL) // empty line
			exit_input_error(total+1);

		target_label = strtod(label,&endptr);
		if(endptr == label || *endptr != '\0')
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			// feature indices larger than those in training are not used
			if(x[i].index <= nr_feature)
				++i;
		}

		if(model_->bias>=0)
		{
			x[i].index = n;
			x[i].value = model_->bias;
			i++;
		}
		x[i].index = -1;

		if(model_->normal){
			double length = 0;
			for(int kk = 0; x[kk].index != -1; kk++)
				length += x[kk].value * x[kk].value;
	
			length = sqrt(length);
			
			for(int kk = 0; x[kk].index != -1; kk++)
				x[kk].value /= length;
		}

		if(flag_predict_probability)
		{
			int j;
			predict_label = predict_probability(model_,x,prob_estimates);
			fprintf(output,"%g",predict_label);
			for(j=0;j<model_->nr_class;j++)
				fprintf(output," %g",prob_estimates[j]);
			fprintf(output,"\n");
		}
		else
		{
			predict_label = predict(model_,x);
			fprintf(output,"%g\n",predict_label);
		}

		if(predict_label == target_label)
			++correct;
		error += (predict_label-target_label)*(predict_label-target_label);
		sump += predict_label;
		sumt += target_label;
		sumpp += predict_label*predict_label;
		sumtt += target_label*target_label;
		sumpt += predict_label*target_label;
		++total;
	}
	if(model_->param.solver_type==L2R_L2LOSS_SVR ||
	   model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
	   model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
	{
		info("Mean squared error = %g (regression)\n",error/total);
		info("Squared correlation coefficient = %g (regression)\n",
			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
			);
	}
	else
		info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	if(flag_predict_probability)
		free(prob_estimates);
}
Ejemplo n.º 6
0
void mexFunction( int nlhs, mxArray *plhs[],
		int nrhs, const mxArray *prhs[] )
{
	int prob_estimate_flag = 0;
	struct model *model_;
	char cmd[CMD_LEN];
	col_format_flag = 0;

	if(nrhs > 5 || nrhs < 3)
	{
		exit_with_help();
		fake_answer(plhs);
		return;
	}
	if(nrhs == 5)
	{
		mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1);
		if(strcmp(cmd, "col") == 0)
		{			
			col_format_flag = 1;
		}
	}

	if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
		mexPrintf("Error: label vector and instance matrix must be double\n");
		fake_answer(plhs);
		return;
	}

	if(mxIsStruct(prhs[2]))
	{
		const char *error_msg;

		// parse options
		if(nrhs>=4)
		{
			int i, argc = 1;
			char *argv[CMD_LEN/2];

			// put options in argv[]
			mxGetString(prhs[3], cmd,  mxGetN(prhs[3]) + 1);
			if((argv[argc] = strtok(cmd, " ")) != NULL)
				while((argv[++argc] = strtok(NULL, " ")) != NULL)
					;

			for(i=1;i<argc;i++)
			{
				if(argv[i][0] != '-') break;
				if(++i>=argc)
				{
					exit_with_help();
					fake_answer(plhs);
					return;
				}
				switch(argv[i-1][1])
				{
					case 'b':
						prob_estimate_flag = atoi(argv[i]);
						break;
					default:
						mexPrintf("unknown option\n");
						exit_with_help();
						fake_answer(plhs);
						return;
				}
			}
		}

		model_ = Malloc(struct model, 1);
		error_msg = matlab_matrix_to_model(model_, prhs[2]);
		if(error_msg)
		{
			mexPrintf("Error: can't read model: %s\n", error_msg);
			free_and_destroy_model(&model_);
			fake_answer(plhs);
			return;
		}

		if(prob_estimate_flag)
		{
			if(!check_probability_model(model_))
			{
				mexPrintf("probability output is only supported for logistic regression\n");
				prob_estimate_flag=0;
			}
		}

		if(mxIsSparse(prhs[1]))
			do_predict(plhs, prhs, model_, prob_estimate_flag);
		else
		{
			mexPrintf("Testing_instance_matrix must be sparse; "
				"use sparse(Testing_instance_matrix) first\n");
			fake_answer(plhs);
		}

		// destroy model_
		free_and_destroy_model(&model_);
	}
	else
	{
Ejemplo n.º 7
0
void do_predict(FILE *input, FILE *output, struct model* model_)
{
	int correct = 0;
	int total = 0;

	int nr_class=get_nr_class(model_);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_);
	if(model_->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_,labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");		
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		int target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t");
		target_label = (int) strtol(label,&endptr,10);
		if(endptr == label)
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			// feature indices larger than those in training are not used
			if(x[i].index <= nr_feature)
				++i;
		}

		if(model_->bias>=0)
		{
			x[i].index = n;
			x[i].value = model_->bias;
			i++;
		}
		x[i].index = -1;

		if(flag_predict_probability)
		{
			int j;
			predict_label = predict_probability(model_,x,prob_estimates);
			fprintf(output,"%d",predict_label);
			for(j=0;j<model_->nr_class;j++)
				fprintf(output," %g",prob_estimates[j]);
			fprintf(output,"\n");
		}
		else
		{
			predict_label = predict(model_,x);
			fprintf(output,"%d\n",predict_label);
		}

		if(predict_label == target_label)
			++correct;
		++total;
	}
	printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	if(flag_predict_probability)
		free(prob_estimates);
}
Ejemplo n.º 8
0
void do_predict(FILE *input, FILE *output)
{
    int total = 0;

    int nr_class=get_nr_class(model_);
    double *prob_estimates=NULL;
    int n;
    int nr_feature=get_nr_feature(model_);
    if(model_->bias>=0)
        n=nr_feature+1;
    else
        n=nr_feature;

    if(!check_probability_model(model_))
    {
        fprintf(stderr, "probability output is only supported for logistic regression\n");
        exit(1);
    }

    prob_estimates = (double *) malloc(nr_class*sizeof(double));

    max_line_len = 1024;
    line = (char *)malloc(max_line_len*sizeof(char));
    int clicks = 0;
    int shows = 0;
    while(readline(input) != NULL)
    {
        int i = 0;
        double target_ctr, predict_ctr;
        char *idx, *val, *endptr;

        int inst_max_index = 0; // strtol gives 0 if wrong format

        char *p = strtok(line," \t\n"); //clicks
        if(p == NULL) // empty line
            exit_input_error(total+1);

        clicks = atoi(p);
        p = strtok(NULL," \t"); // shows
        shows = atoi(p);
        p = strtok(NULL," \t"); // qid:1

        if (shows <=0 || clicks > shows) {
            continue;
        }

        target_ctr = (double)clicks / shows;

        while(1)
        {
            if(i>=max_nr_attr-2)	// need one more for index = -1
            {
                max_nr_attr *= 2;
                x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
            }

            idx = strtok(NULL,":");
            val = strtok(NULL," \t");

            if(val == NULL)
                break;
            errno = 0;
            x[i].index = (int) strtol(idx,&endptr,10);
            if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
                exit_input_error(total+1);
            else
                inst_max_index = x[i].index;

            errno = 0;
            x[i].value = strtod(val,&endptr);
            if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
                exit_input_error(total+1);

            // feature indices larger than those in training are not used
            if(x[i].index <= nr_feature)
                ++i;
        }

        if(model_->bias>=0)
        {
            x[i].index = n;
            x[i].value = model_->bias;
            i++;
        }
        x[i].index = -1;

        predict_probability(model_,x,prob_estimates);
        fprintf(output,"%d %d ", clicks, shows);
        predict_ctr = prob_estimates[0];
        fprintf(output," %g\n", predict_ctr);
    }

    info("total:%d\n",total);

    free(prob_estimates);
}
Ejemplo n.º 9
0
void do_predict(FILE *input, FILE *output)
{
	int correct = 0;
	int total = 0;
	double error = 0;
	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

	int nr_class=get_nr_class(model_[0]);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_[0]);
	if(model_[0]->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_[0]))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_[0],labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t\n");
		if(label == NULL) // empty line
			exit_input_error(total+1);

		// target_label = strtod(label,&endptr);
		switch (label[0]) {
			case 'A': target_label = 0; break;
			case 'B': target_label = 1; break;
			case 'C': target_label = 1; break;
			case 'D': target_label = 1; break;
		}
		// if(endptr == label || *endptr != '\0')
		// 	exit_input_error(total+1);
		for (int pid = 0; pid < sum_pro; pid++) {
			while(1)
			{
				if(i>=max_nr_attr-2)	// need one more for index = -1
				{
					max_nr_attr *= 2;
					x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
				}

				idx = strtok(NULL,":");
				val = strtok(NULL," \t");

				if(val == NULL)
					break;
				errno = 0;
				x[i].index = (int) strtol(idx,&endptr,10);
				if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
					exit_input_error(total+1);
				else
					inst_max_index = x[i].index;

				errno = 0;
				x[i].value = strtod(val,&endptr);
				if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
					exit_input_error(total+1);

				// feature indices larger than those in training are not used
				if(x[i].index <= nr_feature)
					++i;
			}

			if(model_[pid]->bias>=0)
			{
				x[i].index = n;
				x[i].value = model_[pid]->bias;
				i++;
			}
			x[i].index = -1;

			if(flag_predict_probability)
			{
				int j;
				predict_label = predict_probability(model_[pid],x,prob_estimates);
				fprintf(output,"%g",predict_label);
				for(j=0;j<model_[pid]->nr_class;j++)
					fprintf(output," %g",prob_estimates[j]);
				fprintf(output,"\n");
			}
			else
			{
				p_label[pid] = predict(model_[pid],x);
				fprintf(output,"%g", p_label[pid]);

				// printf("pid%dhas done\n",pid );
			}
			fprintf(output, "\n" );
		}
		int count = 0;
		predict_label = 0;
		// for ( int l = 0; l < BLOCK ; l++) {
		// 	for (int m = 0;m < BLOCK * N; m++) {
		// 		// printf("%f\t", p_label[l * BLOCK + m]);
		// 		if ( p_label[l * BLOCK + m] == 1) {
		// 			// p_label[l] = 1;
		// 			// break;
		// 			p_label[l]++;
		// 			// count++;* 4
		// 		}
		// 	}
		// 	if (p_label[l] < 4) {
		// 		count++;
		// 	}
		// 	// if ( p_label[l] == 1) {
		// 	// 	predict_label = 1;
		// 	// }
		// 	// else {
		// 	// 	predict_label = 0;
		// 	// }
		// 	// if (count >0) {
		// 	// 	predict_label = 1;
		// 	// }
		// 	// else {
		// 	// 	predict_label = 0;
		// 	// }
		// }

		// if (count > 0 ) {
		// 	predict_label = 0;
		// 	}
		// else {
		// 	predict_label = 1;
		// }
		// /printf("\n");
		// fprintf(output,"%g\n",predict_label);

		// if(predict_label == target_label)
		// 	++correct;
		// error += (predict_label-target_label)*(predict_label-target_label);
		// sump += predict_label;
		// sumt += target_label;
		// sumpp += predict_label*predict_label;
		// sumtt += target_label*target_label;
		// sumpt += predict_label*target_label;
		// ++total;
	}
	// if(check_regression_model(model_[0]))
	// {
	// 	info("Mean squared error = %g (regression)\n",error/total);
	// 	info("Squared correlation coefficient = %g (regression)\n",
	// 		((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
	// 		((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
	// 		);
	// }
	// else
	// 	info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	// if(flag_predict_probability)
	// 	free(prob_estimates);
}