Ejemplo n.º 1
0
/* compute mean, maximum, minimum, difference between min & max, variance, median, 
   1st quartile, 3rd quartile, interquartile range of a list of real values */
int getStatistics(mx_real_t* features, mx_real_t* list,int count) {
  int i;
  mx_real_t *_list;
  mx_real_t mean=0, max=-MX_REAL_MAX, min=MX_REAL_MAX, variance=0;
  mx_real_t median, quartile_1, quartile_3;

  if (count==0) {
    for (i=0;i<STATS;i++)
      features[i]=0.0;
    return STATS;
  }


  if (count==1) {
    features[0]=features[1]=features[2]=features[5]=features[6]=features[7]=list[0];
    features[3]=features[4]=features[8]=0.0;
    return STATS;
  }

  _list= (mx_real_t *) rs_malloc(count*sizeof(mx_real_t),"statistics list");

  for (i=0;i<count;i++) {
    _list[i]=list[i];
    mean+=_list[i];
    variance+=mx_sqr(_list[i]);
    if (_list[i] <min)
      min=_list[i];
    if (_list[i] >max)
      max=_list[i];
  }
  
  mean /= count;
  variance = variance/count - mx_sqr(mean);


  qsort(_list,count,sizeof(mx_real_t),_cmp_mx_real);

  median = _list[count/2];
  quartile_1 = _list[count/4];
  quartile_3 = _list[3*count/4];

  features[0] = mean;
  features[1] = max;
  features[2] = min;
  features[3] = max - min;
  features[4] = variance;
  features[5] = median;
  features[6] = quartile_1;
  features[7] = quartile_3;
  features[8] = fabs(quartile_3 - quartile_1);

  rs_free(_list);
  return STATS;
}
Ejemplo n.º 2
0
naive_bayes_classifier_t *nB_update_classifier(naive_bayes_classifier_t *nB, int class_ind, mx_real_t *features) {

  int i;
  mx_real_t temp;

  nB->n_instances[class_ind]++;
  
  if (!nB->finished) {
    for (i=0;i<nB->feature_dim;i++) {
      nB->means[class_ind][i]+=features[i];
      nB->std_dev[class_ind][i]+=features[i]*features[i];
    }
  }
  else {
    for (i=0;i<nB->feature_dim;i++) {
      temp = nB->means[class_ind][i];
      nB->means[class_ind][i]=(nB->means[class_ind][i]*nB->n_instances[class_ind] + features[i]) / (nB->n_instances[class_ind]+ 1.0);
      nB->std_dev[class_ind][i] = sqrt(((mx_sqr(nB->std_dev[class_ind][i]) + temp * temp)*nB->n_instances[class_ind] + features[i]*features[i]) / (nB->n_instances[class_ind]+1.0) - nB->means[class_ind][i]*nB->means[class_ind][i]);
    }
    for (i=0;i<nB->n_classes; i++) {
      nB->class_probs[i]= 1.0*nB->n_instances[i]/(nB->n_instances[nB->n_classes]+1);
    }
  }
  nB->n_instances[nB->n_classes]++;
  return nB;
}
Ejemplo n.º 3
0
/* Standard normal distribution; often results in 0 for many attributes */
mx_real_t nB_class_prob_simple(naive_bayes_classifier_t *nB, mx_real_t *instance, int class_ind) {
    int j;
    mx_real_t prob=0;
    mx_real_t diff, temp, stddev;
    mx_real_t norm_const=sqrt(2 * M_PI);

    prob = nB->class_probs[class_ind];
    for (j=0; j<nB->feature_dim; j++) {
	diff = instance[j]-nB->means[class_ind][j];
	stddev=nB->std_dev[class_ind][j];
	if (stddev ==0)
	    stddev=MX_REAL_MIN;
	temp = (1 / (norm_const * stddev)) * exp(-(mx_sqr(diff) / (2 * mx_sqr(stddev))));
	prob *= temp;
    }

    return prob;
}
Ejemplo n.º 4
0
/* log normal distribution; can cope with many attribute, but difficult to normalise */
mx_real_t nB_class_prob(naive_bayes_classifier_t *nB, mx_real_t *instance, int class_ind) {

    int j;
    mx_real_t prob=0, sqr;

    prob = mx_log(nB->class_probs[class_ind]);
    for (j=0; j<nB->feature_dim; j++) {
		// stdev == 0 -> attribute is constant
    	if (nB->std_dev[class_ind][j]==0)
    		continue;
    	sqr = mx_sqr(nB->std_dev[class_ind][j]);
		if (sqr !=0) 
	    	prob+=-mx_log(nB->std_dev[class_ind][j])-mx_sqr(instance[j]-nB->means[class_ind][j])/(2*sqr);
		else 
	    	prob+=-mx_log(nB->std_dev[class_ind][j])-mx_sqr(instance[j]-nB->means[class_ind][j])/(2*MX_REAL_MIN);
	}
    
    // Transformation in vernünftigen Wertebereich
    return exp(prob/nB->feature_dim);
}