예제 #1
0
void do_predict(FILE *input, FILE *output)
{
    std::vector<double> pred_values; //store decision values
    std::vector<double> true_values; //store true values

	int total = 0;
	int nr_class = get_nr_class(model_);
	int * labels = Malloc(int, nr_class);
    get_labels(model_, labels);
	double * prob_estimates = NULL;
	int j, n;
	int nr_feature = get_nr_feature(model_);
	if(model_->bias >=0)
		n = nr_feature+1;
	else
		n = nr_feature;

    // not yet support multiclass
    assert(nr_class==2);

    //print out header...
    if(output_option ==2) {
		prob_estimates = Malloc(double, nr_class);
		fprintf(output,"labels");
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
    }
// return classification error and the normalized difference between predicted and true sentiment
std::pair<double, double> do_predict(const struct problem *test_prob, struct model* model_)
{
  double acc = 0;
  double clse=0;
  int total = 0;
  double *prob_estimates=NULL;
  int *labels=NULL;
  int nr_class=get_nr_class(model_);
  if(flag_predict_probability)
    {
      if(!check_probability_model(model_))
	{
	  fprintf(stderr, "probability output is only supported for logistic regression\n");
	  exit(1);
	}
      
      labels=(int *) malloc(nr_class*sizeof(int));
      get_labels(model_,labels);
      prob_estimates = (double *) malloc(nr_class*sizeof(double));
    }

  int l = test_prob->l;
  int i = 0;
  for(i=0; i<l; i++)
    {
      int predict_label = 0;
      int target_label=test_prob->y[i];
      feature_node *xi = test_prob->x[i];
      if(flag_predict_probability)
	{
	  int j;
	  predict_label = predict_probability(model_,xi,prob_estimates);
	  double predict_score=0;
	  for(j=0;j<model_->nr_class;j++)
	    predict_score+=prob_estimates[j]*labels[j];
	  //double acc_max= fabs(target_label-3)+2;
	  //acc+=(acc_max-sqrt((predict_score - target_label)*(predict_score - target_label)))/acc_max;
	  acc += (predict_score - target_label) * (predict_score - target_label);
	  if (predict_label!=target_label)
	    clse++;
	}
      else
	{
	  predict_label = predict(model_,xi);
	  //double acc_max= fabs(target_label-3)+2;
	  //acc+=(acc_max-sqrt((predict_label - target_label)*(predict_label - target_label)))/acc_max;
          acc += (predict_label - target_label) * (predict_label - target_label);
          if (predict_label!=target_label)
	    clse++;
	}
      ++total;
    }
  if(flag_predict_probability)
    {
      free(prob_estimates);
      free(labels);
    }
  //printf("Error = %g%% (%d/%d)\n",(double) (total-correct)/total*100,total-correct,total);
  return std::make_pair(clse/total,acc/total) ;
}
예제 #3
0
double SVMLinear::predictModel(vector<double> features)
{

    if(modelLinearSVM==NULL)
    {
        fprintf(stdout,"Error, Train Model First \n");
        return 0.0;
    }
    int nr_class=get_nr_class(modelLinearSVM);
    int bias=modelLinearSVM->bias;

    int sparsity=0.0;
    for (int i=0; i<features.size(); i++)
        if(features[i]!=0.0)
            sparsity++;

    feature_node *x=Malloc(struct feature_node,sparsity+bias+1); //bias and -1 index

    int cnt=0;
    for (int i=0; i<features.size(); i++)
    {
        if(features[i]!=0.0)
        {
            x[cnt].index=i+1;
            x[cnt].value=features[i];
            cnt++;
        }
    }
    if(bias)
    {
        x[cnt].index=modelLinearSVM->nr_feature+1,
               x[cnt].value=1;
        cnt++;
    }
    x[cnt].index=-1;

    double val=0;
    predict_values(modelLinearSVM,x,&val);
    return val;

}
예제 #4
0
void do_predict(FILE *input, FILE *output)
{
	int correct = 0;
	int total = 0;
	double error = 0;
	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

	int nr_class=get_nr_class(model_);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_);
	if(model_->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_,labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t\n");
		if(label == NULL) // empty line
			exit_input_error(total+1);

		target_label = strtod(label,&endptr);
		if(endptr == label || *endptr != '\0')
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			// feature indices larger than those in training are not used
			if(x[i].index <= nr_feature)
				++i;
		}

		if(model_->bias>=0)
		{
			x[i].index = n;
			x[i].value = model_->bias;
			i++;
		}
		x[i].index = -1;

		if(model_->normal){
			double length = 0;
			for(int kk = 0; x[kk].index != -1; kk++)
				length += x[kk].value * x[kk].value;
	
			length = sqrt(length);
			
			for(int kk = 0; x[kk].index != -1; kk++)
				x[kk].value /= length;
		}

		if(flag_predict_probability)
		{
			int j;
			predict_label = predict_probability(model_,x,prob_estimates);
			fprintf(output,"%g",predict_label);
			for(j=0;j<model_->nr_class;j++)
				fprintf(output," %g",prob_estimates[j]);
			fprintf(output,"\n");
		}
		else
		{
			predict_label = predict(model_,x);
			fprintf(output,"%g\n",predict_label);
		}

		if(predict_label == target_label)
			++correct;
		error += (predict_label-target_label)*(predict_label-target_label);
		sump += predict_label;
		sumt += target_label;
		sumpp += predict_label*predict_label;
		sumtt += target_label*target_label;
		sumpt += predict_label*target_label;
		++total;
	}
	if(model_->param.solver_type==L2R_L2LOSS_SVR ||
	   model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
	   model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
	{
		info("Mean squared error = %g (regression)\n",error/total);
		info("Squared correlation coefficient = %g (regression)\n",
			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
			);
	}
	else
		info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	if(flag_predict_probability)
		free(prob_estimates);
}
예제 #5
0
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag)
{
	int label_vector_row_num, label_vector_col_num;
	int feature_number, testing_instance_number;
	int instance_index;
	double *ptr_label, *ptr_predict_label;
	double *ptr_prob_estimates, *ptr_dec_values, *ptr;
	struct feature_node *x;
	mxArray *pplhs[1]; // instance sparse matrix in row format

	int correct = 0;
	int total = 0;
	double error = 0;
	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

	int nr_class=get_nr_class(model_);
	int nr_w;
	double *prob_estimates=NULL;

	if(nr_class==2 && model_->param.solver_type!=MCSVM_CS)
		nr_w=1;
	else
		nr_w=nr_class;

	// prhs[1] = testing instance matrix
	feature_number = get_nr_feature(model_);
	testing_instance_number = (int) mxGetM(prhs[1]);
	if(col_format_flag)
	{
		feature_number = (int) mxGetM(prhs[1]);
		testing_instance_number = (int) mxGetN(prhs[1]);
	}

	label_vector_row_num = (int) mxGetM(prhs[0]);
	label_vector_col_num = (int) mxGetN(prhs[0]);

	if(label_vector_row_num!=testing_instance_number)
	{
		mexPrintf("Length of label vector does not match # of instances.\n");
		fake_answer(plhs);
		return;
	}
	if(label_vector_col_num!=1)
	{
		mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
		fake_answer(plhs);
		return;
	}

	ptr_label    = mxGetPr(prhs[0]);

	// transpose instance matrix
	if(col_format_flag)
		pplhs[0] = (mxArray *)prhs[1];
	else
	{
		mxArray *pprhs[1];
		pprhs[0] = mxDuplicateArray(prhs[1]);
		if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
		{
			mexPrintf("Error: cannot transpose testing instance matrix\n");
			fake_answer(plhs);
			return;
		}
	}


	prob_estimates = Malloc(double, nr_class);

	plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
	if(predict_probability_flag)
		plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
	else
		plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL);

	ptr_predict_label = mxGetPr(plhs[0]);
	ptr_prob_estimates = mxGetPr(plhs[2]);
	ptr_dec_values = mxGetPr(plhs[2]);
	x = Malloc(struct feature_node, feature_number+2);
	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
	{
		int i;
		double target_label, predict_label;

		target_label = ptr_label[instance_index];

		// prhs[1] and prhs[1]^T are sparse
		read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);

		if(predict_probability_flag)
		{
			predict_label = predict_probability(model_, x, prob_estimates);
			ptr_predict_label[instance_index] = predict_label;
			for(i=0;i<nr_class;i++)
				ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
		}
		else
		{
			double *dec_values = Malloc(double, nr_class);
			predict_label = predict_values(model_, x, dec_values);
			ptr_predict_label[instance_index] = predict_label;

			for(i=0;i<nr_w;i++)
				ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
			free(dec_values);
		}

		if(predict_label == target_label)
			++correct;
		error += (predict_label-target_label)*(predict_label-target_label);
		sump += predict_label;
		sumt += target_label;
		sumpp += predict_label*predict_label;
		sumtt += target_label*target_label;
		sumpt += predict_label*target_label;

		++total;
	}
	
	if(model_->param.solver_type==L2R_L2LOSS_SVR || 
           model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 
           model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
        {
                mexPrintf("Mean squared error = %g (regression)\n",error/total);
                mexPrintf("Squared correlation coefficient = %g (regression)\n",
                       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
                       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
                       );
        }
	//else
		//mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);

	// return accuracy, mean squared error, squared correlation coefficient
	plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
	ptr = mxGetPr(plhs[1]);
	ptr[0] = (double)correct/total*100;
	ptr[1] = error/total;
	ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
				((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt));

	free(x);
	if(prob_estimates != NULL)
		free(prob_estimates);
}
void *predictModelWholeGenome(void *arg) {
  thread_data_t *data = (thread_data_t *) arg;

  printf("data->trainedModel is %s\n", data->trainedModel);
  printf("data->coverageFileList is %s\n", data->coverageFileList);
  printf("data->trainFile %s\n", data->trainFile);
  printf("data->paramFile %s\n", data->paramFile);
  printf("data->chr is %d\n", data->chr);

  char *trainedModel = data->trainedModel;
  char *coverageFileList = data->coverageFileList;
  // char *trainFile = data->trainFile;
  char *paramFile = data->paramFile;
  int chr = data->chr;

  // utility var
  int i,j,k;
  
  // trainedModel
  struct model *mymodel;
  if( (mymodel = load_model(trainedModel)) == 0) {
    printf("cannot load model from file %s\n", trainedModel);
    return EXIT_SUCCESS;
  }

  // coverageFileList
  int totalCoverageFiles;
  FILE *coverageFileListFp = NULL;
  if( (coverageFileListFp = fopen(coverageFileList, "r") ) == NULL) {
    printf("Cannot open file %s\n", coverageFileList);
    return EXIT_SUCCESS;
  }
  char **coverageFiles = (char **)calloc(MAX_BAM_FILES,sizeof(char *));
  for(i = 0; i < MAX_BAM_FILES; i++) {
    coverageFiles[i] = (char *)calloc(MAX_DIR_LEN, sizeof(char));
  }
  
  i = 0;
  while (!feof(coverageFileListFp)) {
    if (i >= MAX_BAM_FILES) {
      printf("Error: the number of input coverages files exceeds the limit %d\n", i);
      return EXIT_SUCCESS;
    }
    if( ( fscanf(coverageFileListFp, "%s\n", coverageFiles[i]) ) != 1) {
      printf("Error: reading %dth from %s\n", i, coverageFileList);
      return EXIT_SUCCESS;
    }
    i++;
  }
  totalCoverageFiles = i;
  fclose(coverageFileListFp);

  // open coverage Files
  FILE *coverageFps[totalCoverageFiles];
  for(i = 0; i < totalCoverageFiles; i++) {
    if( (coverageFps[i] = fopen(coverageFiles[i], "rb")) == NULL ) {
      printf("Error opening coverage file %s\n", coverageFiles[i]);
      return EXIT_SUCCESS;
    }
  }

  // paramFile
  struct extractFeatureParam *param = (struct extractFeatureParam *)calloc(1, sizeof(struct extractFeatureParam));
  parseParam(paramFile, param);

  // predict model: by default: predict probability
  int nr_class = get_nr_class(mymodel);
  double *prob_estimates = (double *)calloc(nr_class, sizeof(double));

  // predResult for storing results
  int totalBins = 0;
  int cumBins[NUM_SEQ];
  for (i = 0; i < NUM_SEQ; i++) {
    totalBins += (int)(chrlen[i] / param->resolution) + 1;
    cumBins[i] = totalBins;
  }

  // allocate memory for result based on thread data chr
  // as we are using one thread for each chr
  float *predResult = (float *)calloc( (int)(chrlen[chr] / param->resolution) + 1, sizeof(float));

  // read in feature for each bin and do prediction
  for(j = 0; j < (int)(chrlen[chr] / param->resolution) + 1; j++) {
    if(j % 100000 == 0) {
      printf("Predicting chr%d:%dth bin\n", chr,j);
      fflush(stdout);
    }
    int max_nr_feature = 100;
    struct feature_node *myX = (struct feature_node *)calloc(max_nr_feature, sizeof(struct feature_node));
    int idx = 0;
    for(k = 0; k < totalCoverageFiles; k++) {
      float *buffer = (float *)calloc( param->windowSize/param->resolution,sizeof(float));
      int offset = j;
      offset += -(int)((float)(param->windowSize / 2) / (float)param->resolution + 0.5);
      if(offset < 0 || offset + (int)((float)(param->windowSize) / (float)param->resolution + 0.5) > (int)(chrlen[i] / param->resolution) + 1) {
        // printf("offset is %d\n", offset);
        free(buffer);
        continue;
      }
      if(chr != 0) offset += cumBins[chr-1];
      // printf("offset is %d\n", offset);
      fseek(coverageFps[k], offset*sizeof(float), SEEK_SET);
      fread(buffer, sizeof(float), param->windowSize/param->resolution, coverageFps[k]);
      int l;
      // printf("buffer[%d] is:",l);
      for(l = 0; l < param->windowSize/param->resolution; l++) {
        // if(j == 289540) printf("%f,",buffer[l]);
        if(buffer[l] != 0) {
          myX[idx].index = k*(param->windowSize/param->resolution) + l + 1;
          myX[idx].value = buffer[l];
          idx++;
        }
        if(idx >= max_nr_feature -2) { // feature_node is not long enough
          max_nr_feature *= 2;
          myX = (struct feature_node *)realloc(myX, max_nr_feature*sizeof(struct feature_node));
        }
      }
      free(buffer);
    } // end of loop through coverageFiles
    // printf("\n");
    myX[idx].index = -1; // a flag for end of features
    if(idx == 0) {
      // printf("idx is %d\n",idx);
      predResult[j] = 0.0;
      free(myX);
      continue;
    }
    // printf("nr_feature is %d\n", idx);
    predict_probability(mymodel, myX, prob_estimates);
    // printf("num of feature is %d\n", get_nr_feature(mymodel));
    // printf("num of class is %d\n", get_nr_class(mymodel));
    int *mylabel = (int *)calloc(10, sizeof(int));
    // added, in order to get the correct label
    get_labels(mymodel, mylabel);
    if(mylabel[0] == 1) {
      predResult[j] = prob_estimates[0];
    } else {
      predResult[j] = prob_estimates[1];
    }
 
    free(myX);
    free(mylabel);
  }


  for(i = 0; i < totalCoverageFiles; i++) {
    fclose(coverageFps[i]);
  }
  // free pointers
  for(i = 0; i < MAX_BAM_FILES; i++) {
    free(coverageFiles[i]);
  }
  free(coverageFiles);
  free(param);
  free(prob_estimates);
  // give address of pointer to this function, so that the function can free the pointer.
  free_and_destroy_model(&mymodel); 
  pthread_exit((void *) predResult);
}
예제 #7
0
void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag)
{
	int label_vector_row_num, label_vector_col_num;
	int feature_number, testing_instance_number;
	int instance_index;
	double *ptr_instance, *ptr_label, *ptr_predict_label;
	double *ptr_prob_estimates, *ptr_dec_values, *ptr;
	struct feature_node *x;
	mxArray *pplhs[1]; // instance sparse matrix in row format

	int correct = 0;
	int total = 0;

	int nr_class=get_nr_class(model_);
	int nr_classifier;
	double *prob_estimates=NULL;

	if(nr_class==2)
		nr_classifier=1;
	else
		nr_classifier=nr_class;

	// prhs[1] = testing instance matrix
	feature_number = mxGetN(prhs[1]);
	testing_instance_number = mxGetM(prhs[1]);
	if(col_format_flag)
	{
		feature_number = mxGetM(prhs[1]);
		testing_instance_number = mxGetN(prhs[1]);
	}

	label_vector_row_num = mxGetM(prhs[0]);
	label_vector_col_num = mxGetN(prhs[0]);

	if(label_vector_row_num!=testing_instance_number)
	{
		mexPrintf("Length of label vector does not match # of instances.\n");
		fake_answer(plhs);
		return;
	}
	if(label_vector_col_num!=1)
	{
		mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
		fake_answer(plhs);
		return;
	}

	ptr_instance = mxGetPr(prhs[1]);
	ptr_label    = mxGetPr(prhs[0]);

	// transpose instance matrix
	if(mxIsSparse(prhs[1]))
	{
		if(col_format_flag)
		{
			pplhs[0] = (mxArray *)prhs[1];
		}
		else
		{
			mxArray *pprhs[1];
			pprhs[0] = mxDuplicateArray(prhs[1]);
			if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
			{
				mexPrintf("Error: cannot transpose testing instance matrix\n");
				fake_answer(plhs);
				return;
			}
		}
	}
	else
		mexPrintf("Testing_instance_matrix must be sparse\n");


	prob_estimates = Malloc(double, nr_class);

	plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
	if(predict_probability_flag)
		plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
	else
		plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_classifier, mxREAL);

	ptr_predict_label = mxGetPr(plhs[0]);
	ptr_prob_estimates = mxGetPr(plhs[2]);
	ptr_dec_values = mxGetPr(plhs[2]);
	x = Malloc(struct feature_node, feature_number+2);
	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
	{
		int i;
		double target,v;

		target = ptr_label[instance_index];

		// prhs[1] and prhs[1]^T are sparse
		read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);

		if(predict_probability_flag)
		{
			v = predict_probability(model_, x, prob_estimates);
			ptr_predict_label[instance_index] = v;
			for(i=0;i<nr_class;i++)
				ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
		}
		else
		{
			double *dec_values = Malloc(double, nr_class);
			v = predict(model_, x);
			ptr_predict_label[instance_index] = v;

			predict_values(model_, x, dec_values);
			for(i=0;i<nr_classifier;i++)
				ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
		}

		if(v == target)
			++correct;
		++total;
	}
	mexPrintf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total);

	// return accuracy, mean squared error, squared correlation coefficient
	plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
	ptr = mxGetPr(plhs[1]);
	ptr[0] = (double)correct/total*100;

	free(x);
	if(prob_estimates != NULL)
		free(prob_estimates);
}
예제 #8
0
void do_predict(FILE *input, FILE *output, struct model* model_)
{
	int correct = 0;
	int total = 0;

	int nr_class=get_nr_class(model_);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_);
	if(model_->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_,labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");		
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		int target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t");
		target_label = (int) strtol(label,&endptr,10);
		if(endptr == label)
			exit_input_error(total+1);

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			idx = strtok(NULL,":");
			val = strtok(NULL," \t");

			if(val == NULL)
				break;
			errno = 0;
			x[i].index = (int) strtol(idx,&endptr,10);
			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
				exit_input_error(total+1);
			else
				inst_max_index = x[i].index;

			errno = 0;
			x[i].value = strtod(val,&endptr);
			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
				exit_input_error(total+1);

			// feature indices larger than those in training are not used
			if(x[i].index <= nr_feature)
				++i;
		}

		if(model_->bias>=0)
		{
			x[i].index = n;
			x[i].value = model_->bias;
			i++;
		}
		x[i].index = -1;

		if(flag_predict_probability)
		{
			int j;
			predict_label = predict_probability(model_,x,prob_estimates);
			fprintf(output,"%d",predict_label);
			for(j=0;j<model_->nr_class;j++)
				fprintf(output," %g",prob_estimates[j]);
			fprintf(output,"\n");
		}
		else
		{
			predict_label = predict(model_,x);
			fprintf(output,"%d\n",predict_label);
		}

		if(predict_label == target_label)
			++correct;
		++total;
	}
	printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	if(flag_predict_probability)
		free(prob_estimates);
}
예제 #9
0
파일: eval.cpp 프로젝트: Joelone/MLEA
double binary_class_cross_validation(const problem *prob, const parameter *param, int nr_fold)
{
	dvec_t dec_values;
	ivec_t ty;
	int *labels;

	if (nr_fold > 1)
	{
		int i;
		int *fold_start = Malloc(int,nr_fold+1);
		int l = prob->l;
		int *perm = Malloc(int,l);

		for(i=0;i<l;i++) perm[i]=i;
		for(i=0;i<l;i++)
		{
			int j = i+rand()%(l-i);
			std::swap(perm[i],perm[j]);
		}
		for(i=0;i<=nr_fold;i++)
			fold_start[i]=i*l/nr_fold;

		for(i=0;i<nr_fold;i++)
		{
			int                begin   = fold_start[i];
			int                end     = fold_start[i+1];
			int                j,k;
			struct problem subprob;

			subprob.l = l-(end-begin);
			subprob.x = Malloc(struct feature_node*,subprob.l);
			subprob.y = Malloc(int,subprob.l);

			k=0;
			for(j=0;j<begin;j++)
			{
				subprob.x[k] = prob->x[perm[j]];
				subprob.y[k] = prob->y[perm[j]];
				++k;
			}
			for(j=end;j<l;j++)
			{
				subprob.x[k] = prob->x[perm[j]];
				subprob.y[k] = prob->y[perm[j]];
				++k;
			}
			struct model *submodel = train(&subprob,param);
			//int svm_type = get_svm_type(submodel);
	
			//if(svm_type == NU_SVR || svm_type == EPSILON_SVR){
			//	fprintf(stderr, "wrong svm type");
			//	exit(1);
			//}

			labels = Malloc(int, get_nr_class(submodel));
			get_labels(submodel, labels);

			if(get_nr_class(submodel) > 2) 
			{
				fprintf(stderr,"Error: the number of class is not equal to 2\n");
				exit(-1);
			}

			dec_values.resize(end);
			ty.resize(end);

			for(j=begin;j<end;j++) {
				predict_values(submodel,prob->x[perm[j]], &dec_values[j]);
				ty[j] = (prob->y[perm[j]] > 0)? 1: -1;
			}


			if(labels[0] <= 0) {
				for(j=begin;j<end;j++)
					dec_values[j] *= -1;
			}
	
			free_and_destroy_model(&submodel);
			free(subprob.x);
			free(subprob.y);
			free(labels);
		}

		free(perm);
		free(fold_start);
	}
예제 #10
0
void do_predict(FILE *input, FILE *output)
{
    int total = 0;

    int nr_class=get_nr_class(model_);
    double *prob_estimates=NULL;
    int n;
    int nr_feature=get_nr_feature(model_);
    if(model_->bias>=0)
        n=nr_feature+1;
    else
        n=nr_feature;

    if(!check_probability_model(model_))
    {
        fprintf(stderr, "probability output is only supported for logistic regression\n");
        exit(1);
    }

    prob_estimates = (double *) malloc(nr_class*sizeof(double));

    max_line_len = 1024;
    line = (char *)malloc(max_line_len*sizeof(char));
    int clicks = 0;
    int shows = 0;
    while(readline(input) != NULL)
    {
        int i = 0;
        double target_ctr, predict_ctr;
        char *idx, *val, *endptr;

        int inst_max_index = 0; // strtol gives 0 if wrong format

        char *p = strtok(line," \t\n"); //clicks
        if(p == NULL) // empty line
            exit_input_error(total+1);

        clicks = atoi(p);
        p = strtok(NULL," \t"); // shows
        shows = atoi(p);
        p = strtok(NULL," \t"); // qid:1

        if (shows <=0 || clicks > shows) {
            continue;
        }

        target_ctr = (double)clicks / shows;

        while(1)
        {
            if(i>=max_nr_attr-2)	// need one more for index = -1
            {
                max_nr_attr *= 2;
                x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
            }

            idx = strtok(NULL,":");
            val = strtok(NULL," \t");

            if(val == NULL)
                break;
            errno = 0;
            x[i].index = (int) strtol(idx,&endptr,10);
            if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
                exit_input_error(total+1);
            else
                inst_max_index = x[i].index;

            errno = 0;
            x[i].value = strtod(val,&endptr);
            if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
                exit_input_error(total+1);

            // feature indices larger than those in training are not used
            if(x[i].index <= nr_feature)
                ++i;
        }

        if(model_->bias>=0)
        {
            x[i].index = n;
            x[i].value = model_->bias;
            i++;
        }
        x[i].index = -1;

        predict_probability(model_,x,prob_estimates);
        fprintf(output,"%d %d ", clicks, shows);
        predict_ctr = prob_estimates[0];
        fprintf(output," %g\n", predict_ctr);
    }

    info("total:%d\n",total);

    free(prob_estimates);
}
예제 #11
0
void do_predict(FILE *input, FILE *output)
{
	int correct = 0;
	int total = 0;
	double error = 0;
	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;

	int nr_class=get_nr_class(model_[0]);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_[0]);
	if(model_[0]->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(!check_probability_model(model_[0]))
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			exit(1);
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_[0],labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}

	max_line_len = 1024;
	line = (char *)malloc(max_line_len*sizeof(char));
	while(readline(input) != NULL)
	{
		int i = 0;
		double target_label, predict_label;
		char *idx, *val, *label, *endptr;
		int inst_max_index = 0; // strtol gives 0 if wrong format

		label = strtok(line," \t\n");
		if(label == NULL) // empty line
			exit_input_error(total+1);

		// target_label = strtod(label,&endptr);
		switch (label[0]) {
			case 'A': target_label = 0; break;
			case 'B': target_label = 1; break;
			case 'C': target_label = 1; break;
			case 'D': target_label = 1; break;
		}
		// if(endptr == label || *endptr != '\0')
		// 	exit_input_error(total+1);
		for (int pid = 0; pid < sum_pro; pid++) {
			while(1)
			{
				if(i>=max_nr_attr-2)	// need one more for index = -1
				{
					max_nr_attr *= 2;
					x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
				}

				idx = strtok(NULL,":");
				val = strtok(NULL," \t");

				if(val == NULL)
					break;
				errno = 0;
				x[i].index = (int) strtol(idx,&endptr,10);
				if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
					exit_input_error(total+1);
				else
					inst_max_index = x[i].index;

				errno = 0;
				x[i].value = strtod(val,&endptr);
				if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
					exit_input_error(total+1);

				// feature indices larger than those in training are not used
				if(x[i].index <= nr_feature)
					++i;
			}

			if(model_[pid]->bias>=0)
			{
				x[i].index = n;
				x[i].value = model_[pid]->bias;
				i++;
			}
			x[i].index = -1;

			if(flag_predict_probability)
			{
				int j;
				predict_label = predict_probability(model_[pid],x,prob_estimates);
				fprintf(output,"%g",predict_label);
				for(j=0;j<model_[pid]->nr_class;j++)
					fprintf(output," %g",prob_estimates[j]);
				fprintf(output,"\n");
			}
			else
			{
				p_label[pid] = predict(model_[pid],x);
				fprintf(output,"%g", p_label[pid]);

				// printf("pid%dhas done\n",pid );
			}
			fprintf(output, "\n" );
		}
		int count = 0;
		predict_label = 0;
		// for ( int l = 0; l < BLOCK ; l++) {
		// 	for (int m = 0;m < BLOCK * N; m++) {
		// 		// printf("%f\t", p_label[l * BLOCK + m]);
		// 		if ( p_label[l * BLOCK + m] == 1) {
		// 			// p_label[l] = 1;
		// 			// break;
		// 			p_label[l]++;
		// 			// count++;* 4
		// 		}
		// 	}
		// 	if (p_label[l] < 4) {
		// 		count++;
		// 	}
		// 	// if ( p_label[l] == 1) {
		// 	// 	predict_label = 1;
		// 	// }
		// 	// else {
		// 	// 	predict_label = 0;
		// 	// }
		// 	// if (count >0) {
		// 	// 	predict_label = 1;
		// 	// }
		// 	// else {
		// 	// 	predict_label = 0;
		// 	// }
		// }

		// if (count > 0 ) {
		// 	predict_label = 0;
		// 	}
		// else {
		// 	predict_label = 1;
		// }
		// /printf("\n");
		// fprintf(output,"%g\n",predict_label);

		// if(predict_label == target_label)
		// 	++correct;
		// error += (predict_label-target_label)*(predict_label-target_label);
		// sump += predict_label;
		// sumt += target_label;
		// sumpp += predict_label*predict_label;
		// sumtt += target_label*target_label;
		// sumpt += predict_label*target_label;
		// ++total;
	}
	// if(check_regression_model(model_[0]))
	// {
	// 	info("Mean squared error = %g (regression)\n",error/total);
	// 	info("Squared correlation coefficient = %g (regression)\n",
	// 		((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
	// 		((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
	// 		);
	// }
	// else
	// 	info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
	// if(flag_predict_probability)
	// 	free(prob_estimates);
}
예제 #12
0
void do_predict(FILE *input, FILE *output, struct model* model_)
{
	int correct = 0;
	int total = 0;

	int nr_class=get_nr_class(model_);
	double *prob_estimates=NULL;
	int j, n;
	int nr_feature=get_nr_feature(model_);
	if(model_->bias>=0)
		n=nr_feature+1;
	else
		n=nr_feature;

	if(flag_predict_probability)
	{
		int *labels;

		if(model_->param.solver_type!=L2_LR)
		{
			fprintf(stderr, "probability output is only supported for logistic regression\n");
			return;
		}

		labels=(int *) malloc(nr_class*sizeof(int));
		get_labels(model_,labels);
		prob_estimates = (double *) malloc(nr_class*sizeof(double));
		fprintf(output,"labels");		
		for(j=0;j<nr_class;j++)
			fprintf(output," %d",labels[j]);
		fprintf(output,"\n");
		free(labels);
	}
	while(1)
	{
		int i = 0;
		int c;
		double target;
		int target_label, predict_label;

		if (fscanf(input,"%lf",&target)==EOF)
			break;
		target_label=(int)target;

		while(1)
		{
			if(i>=max_nr_attr-2)	// need one more for index = -1
			{
				max_nr_attr *= 2;
				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
			}

			do {
				c = getc(input);
				if(c=='\n' || c==EOF) goto out2;
			} while(isspace(c));
			ungetc(c,input);
			if (fscanf(input,"%d:%lf",&x[i].index,&x[i].value) < 2)
			{
				fprintf(stderr,"Wrong input format at line %d\n", total+1);
				exit(1);
			}
			// feature indices larger than those in training are not used
			if(x[i].index<=nr_feature)
				++i;
		}

out2:
		if(model_->bias>=0)
		{
			x[i].index = n;
			x[i].value = model_->bias;
			i++;
		}
		x[i].index = -1;

		if(flag_predict_probability)
		{
			int j;
			predict_label = predict_probability(model_,x,prob_estimates);
			fprintf(output,"%d ",predict_label);
			for(j=0;j<model_->nr_class;j++)
				fprintf(output,"%g ",prob_estimates[j]);
			fprintf(output,"\n");
		}
		else
		{
			predict_label = predict(model_,x);
			fprintf(output,"%d\n",predict_label);
		}

		if(predict_label == target_label)
			++correct;
		++total;
	}
	printf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total);
	if(flag_predict_probability)
		free(prob_estimates);
}