Example #1
0
static double
rfFeatureInfoGain(RANDOM_FOREST *rf, TREE *tree, NODE *parent, NODE *left, NODE *right, int fno, int *ptotal_count)
{
  int    c ;
  double info_gain = 0, entropy_before, entropy_after, wr, wl ;

  if (left != NULL)   //  not a leaf and uses this feature
  {
    if (parent->feature == fno)
    {
      entropy_before = entropy(parent->class_counts, rf->nclasses, tree->root.class_counts) ;
      for (wr = wl = 0.0, c = 0 ; c < rf->nclasses ; c++)
      {
	if (tree->root.class_counts[c] == 0)
	  continue ;
	wl += (double)left->class_counts[c] / tree->root.class_counts[c] ;
	wr += (double)right->class_counts[c] / tree->root.class_counts[c] ;
      }
      wl = wl / (wl + wr); wr = 1-wl ;
      
      entropy_after = 
	wl * entropy(left->class_counts, rf->nclasses, tree->root.class_counts) +
	wr * entropy(right->class_counts, rf->nclasses, tree->root.class_counts) ;
      info_gain = ((double)parent->total_counts * (entropy_before-entropy_after)) ;
      *ptotal_count += parent->total_counts ;
    }
    else 
      info_gain = 0 ;

    info_gain += rfFeatureInfoGain(rf, tree, left, left->left, left->right, fno, ptotal_count) ;
    info_gain += rfFeatureInfoGain(rf, tree, right, right->left, right->right, fno, ptotal_count) ;
  }
  return(info_gain) ;
}
Example #2
0
// Function to compute (naive) mutual information
double mutualinfo(const double *cij, const int numi, const int numj) {
    int i,j,k;
    double I;
    
    // Compute marginal distributions
    double *pi = (double *) mxMalloc(numi*sizeof(double)); //new double[numi];
    double *pj = (double *) mxMalloc(numj*sizeof(double)); //new double[numj];
    double *pij = (double *) mxMalloc(numi*numj*sizeof(double)); //new double[numi*numj];
    double N =0.;
    for (i=0; i<numi; i++)  {
        pi[i] = 0.0;
    }
    for (j=0; j<numj; j++)  {
        pj[j] = 0.0;
    }
    for (k=0; k<numi*numj; k++) {
        N += cij[k];
        pij[k] = 0.;
    }
    
    for (i=0; i<numi; i++) {
        for (j=0; j<numj; j++) {
            pij[i+j*numi] = cij[i+j*numi]/N;
            pi[i] += pij[i+j*numi];
            pj[j] += pij[i+j*numi];
        }
    }
    
    // Compute MI using entropy of joint and marginal distributions
    I = entropy(pi,numi) + entropy(pj,numj) - entropy(pij, numi*numj);
    mxFree(pi); //delete[] pi;
    mxFree(pj); //delete[] pj;
    mxFree(pij); //delete[] pij;
    return I;
}
Example #3
0
static int
rfTrainNode(RANDOM_FOREST *rf, TREE *tree, NODE *node, int *training_classes, 
            double **training_data, int ntraining)
{
  if (node->left == NULL)   // not trained yet
  {
    node->left = rfAllocateNode(node->total_counts, node->depth+1, rf->nclasses) ;
    node->right = rfAllocateNode(node->total_counts, node->depth+1, rf->nclasses) ;
    if (find_optimal_feature_and_threshold(rf, tree, node, node->left, node->right, 
					   training_classes, 
					   training_data, rf->ntraining) == 0)
    {
      // couldn't find a threshold to improve separation
      rfFreeNode(&node->left) ; rfFreeNode(&node->right) ;
      return(0) ;
    }
  }
  if (node->depth > tree->depth)
    tree->depth = node->depth ;
  if (node->depth < rf->max_depth)
  {
    if (entropy(node->left->class_counts, rf->nclasses, tree->root.class_counts) > 0)
      rfTrainNode(rf, tree, node->left, training_classes, training_data, ntraining) ;
    if (entropy(node->right->class_counts, rf->nclasses, tree->root.class_counts) > 0)
      rfTrainNode(rf, tree, node->right, training_classes, training_data, ntraining) ;
  }
  return(1) ;
}
Example #4
0
double conditional_entropy(const double* H, double* hJ, unsigned int clampI, unsigned int clampJ)
{
  double n; 
  double entIJ, entJ;  
  _marginalize(hJ, H, clampI, clampJ, 1); 
  entIJ = entropy(H, clampI*clampJ, &n); 
  entJ = entropy(hJ, clampJ, &n); 
  return(entIJ - entJ); /* Entropy of I given J */ 
}
Example #5
0
double mutual_information(const double* H, double* hI, unsigned int clampI, double* hJ, unsigned int clampJ, double* n)
{
  double entIJ, entI, entJ; 
  _marginalize(hI, H, clampI, clampJ, 0); 
  _marginalize(hJ, H, clampI, clampJ, 1); 
  entI = entropy(hI, clampI, n); 
  entJ = entropy(hJ, clampJ, n); 
  entIJ = entropy(H, clampI*clampJ, n); 
  return(entI + entJ - entIJ); 
}
Example #6
0
/**
 * @brief Computes rolling hash on entropic data from a randomSource if data
 *        meets threshold on entropy estimate.
 *
 * @param randomSource pointer to a RandomSource.
 *
 * @return true, if data was entropic enough to be processed.
 */
bool SeedGenerator::processFromSource(RandomSource* randomSource) {

	// Check if seed can already been computed.
	if (_seedReady) {
		return false; // Cannot process data until seed is flushed or reset.
	}

	// Compute avg. bit occurrence in a sample from randomSource.
	std::vector<double> sampleAvgVec = randomSource->bitEntropy();
	double sum = std::accumulate(sampleAvgVec.begin(),sampleAvgVec.end(),0.0f);
	double avgSampleEntropy = sum/static_cast<double>(sampleAvgVec.size());

	// Check if estimate meets threshold.
	if (avgSampleEntropy < SeedGenerator::ENTROPYTHRESHOLD) {
		// Data not good enough.
		std::cerr << "[Entropy Error] Sample entropy estimate low" << std::endl;
		return false;
	}

	// Load bytes from randomsource into randomData.
	std::vector<uint8_t> randomData;
	randomSource->appendData(randomData);

	// Split random bytes into _numDivs to compute _numDivs hashes.
	auto it = randomData.data();
	int stepSize = randomData.size() / _numDivs;
	int excess = randomData.size() % _numDivs;

	// Loop through batches of data.
	for (int i = 0; i < (_numDivs - 1); ++i) {
		/* Compute avg. bit occurrence in a byte for this batch and check if
		 * it meets the threshold.
		 */
		if (!entropy(it, it + stepSize)) {
			// Data not good enough
			std::cerr << "[Error] Byte entropy estimate low" << std::endl;
			return false;
		}

		// Compute rolling hash for this batch.
		_hashVec[i].Update(it, stepSize);

		it = it + stepSize;
	}

	// Final batch
	if (!entropy(it, it + stepSize + excess)) {
		// Data not good enough
		std::cerr << "[Error] Byte entropy estimate low" << std::endl;
		return false;
	}
	_hashVec[_numDivs-1].Update(it, stepSize + excess);

	return true;
}
Example #7
0
static void f(void)
{
	//int n = 22;
	//uint8_t data[22] = {1, 2, 3,  7, 7, 7,  1, 2, 3,
	//	200, 2, 3,  222, 222, 222,  1, 2, 3, 44, 45, 46, 47};
	int n = 100003;
	uint8_t data[n];
	for (int i = 0; i < n; i++)
		data[i] = rand();



	fprintf(stderr, "\n\n\noriginal data:\n");
	for (int i = 0; i < n; i++)
		fprintf(stderr, "%d%s", data[i], (i&&(0==(i+1)%77))?"\n":" ");
	fprintf(stderr, "\n");
	fprintf(stderr, "entropy = %g\n", entropy(data, n));


	int ne;
	uint8_t *edata = alloc_and_transform_from_RAW_to_RLE8(data, n, &ne);

	//int ne2;
	//uint8_t *edata2 = alloc_and_transform_from_RAW_to_B64(data, n, &ne2);


	fprintf(stderr, "\nPCX data:\n");
	for (int i = 0; i < ne; i++)
		fprintf(stderr, "%d%s", edata[i], (i&&(0==(i+1)%77))?"\n":" ");
	fprintf(stderr, "\n");
	fprintf(stderr, "%d => %d\n", n, ne);
	fprintf(stderr, "entropy = %g\n", entropy(edata, ne));

	int dne;
	uint8_t *dedata = alloc_and_transform_from_RLE8_to_RAW(edata, ne, &dne);



	fprintf(stderr, "\ndata recovered from PCX:\n");
	for (int i = 0; i < dne; i++)
		fprintf(stderr, "%d%s", dedata[i], (i&&(0==(i+1)%77))?"\n":" ");
	fprintf(stderr, "\n");

	fprintf(stderr, "n=%d dne=%d\n", n, dne);
	assert(n == dne);
	for (int i = 0; i < n; i++)
		assert(data[i] == dedata[i]);


	free(edata);
	free(dedata);
	//free(edata2);
}
Example #8
0
double normalized_mutual_information(const double* H, double* hI, unsigned int clampI, double* hJ, unsigned int clampJ, double *n)
{
  double entIJ, entI, entJ, aux; 
  _marginalize(hI, H, clampI, clampJ, 0); 
  _marginalize(hJ, H, clampI, clampJ, 1); 
  entI = entropy(hI, clampI, n); 
  entJ = entropy(hJ, clampJ, n); 
  entIJ = entropy(H, clampI*clampJ, n);
  aux = entI + entJ; 
  if (aux > 0.0) 
    return(2*(1-entIJ/aux));
  else 
    return 0.0; 
}
Example #9
0
double multiinformation(const int *d, int nsamples, int nvars, int c) {
		 bool *sel = new bool[nvars];
		 double sum = 0;
		 for( int i=0; i<nvars; ++i )
			sel[i] = false;
		 for(int i=0;i<nvars; ++i) {
			sel[i] = true;
			sum += entropy(d, nsamples, nvars, c, sel);
			sel[i] = false;
		 }	
		 for( int i=0; i<nvars; ++i )
			sel[i] = true;
		 sum -= entropy(d, nsamples, nvars, c, sel);
		 return sum;
}
Example #10
0
inline TR entanglement(const T1& rho1, arma::uvec dim) {
  const auto& p = as_Mat(rho1);

#ifndef QICLIB_NO_DEBUG
  bool checkV = true;
  if (p.n_cols == 1)
    checkV = false;

  if (p.n_elem == 0)
    throw Exception("qic::entanglement", Exception::type::ZERO_SIZE);

  if (checkV)
    if (p.n_rows != p.n_cols)
      throw Exception("qic::entanglement",
                      Exception::type::MATRIX_NOT_SQUARE_OR_CVECTOR);

  if (arma::any(dim) == 0)
    throw Exception("qic::entanglement", Exception::type::INVALID_DIMS);

  if (arma::prod(dim) != p.n_rows)
    throw Exception("qic::entanglement", Exception::type::DIMS_MISMATCH_MATRIX);

  if ((dim.n_elem) != 2)
    throw Exception("qic::entanglement", Exception::type::NOT_BIPARTITE);
#endif

  return entropy(TrX(p, {1}, std::move(dim)));
}
Example #11
0
int _tmain(int argc, _TCHAR* argv[])
{
	std::string entropy("entropy");
	Crypto cryptoA(std::vector<BYTE>(entropy.begin(), entropy.end()));
	Crypto cryptoB(std::vector<BYTE>(entropy.begin(), entropy.end()));

	int total = 5;
	int failed = 0
		+ TestGeneratePassword(cryptoA, "0123456789abcdef", 16)
		+ TestEncryptDecrypt(cryptoA, cryptoB)
		+ TestCreateKey25519(DudRandomSource(0x55), L"83BA66B48DF6777D6EB6DDA90E9792319AF48D3BA3210620E7B4641C4F88C476")
		+ TestCreateKey25519(DudRandomSource(0xAA), L"41ACE9D483B7CC3F75640E04D7AACA6C2BA8F44854FEA5158598D49B382E0407")
		+ TestSharedKey25519(DudRandomSource(0x55), DudRandomSource(0xAA), L"99982C6AA51244F9CF49295A8EF0B882E2FED6C131F106556C803143758946E2")
		;

	if (failed)
	{
		std::wcout << L"FAILED (" << failed << L" of " << total << L" tests)" << std::endl;
	}
	else
	{
		std::wcout << L"PASSED (" << total << L" tests)" << std::endl;
	}

	return 0;
}
Example #12
0
/* Update the best split if necessary */ 
static void updateSplit(int feature, float threshold, float posleft, float negleft, node_t* node, split_t* split){
    float posright = max(FLT_EPSILON, node->pos - posleft);
    float negright = max(FLT_EPSILON, node->neg - negleft);
    float sizeleft = posleft+negleft;
    float sizeright = posright+negright;
    float total = node->pos+node->neg;
    float gain = -(sizeleft/total*entropy(posleft/sizeleft)+sizeright/total*entropy(posright/sizeright));
    if (gain > split->gain){
        split->gain = gain;
        split->feature = feature;
        split->threshold = threshold;
        split->posleft = posleft;
        split->negleft = negleft;
        split->posright = posright;
        split->negright = negright;
    }
}
Example #13
0
void CCLEncoder::doEncodeFrame(CImage<uint8_t> * pFrame, CBitstream * pBstr, FRAME_TYPE frame_type)
{
	(*static_cast<CImage<float>*>(m_imgF)) = (*pFrame);
	m_imgF->CopyToDevice();
	transform(m_imgF, m_img, m_predTab, frame_type);
	entropy(m_img, m_predTab, pBstr, frame_type);
	itransform(m_imgF, m_img, m_predTab, frame_type);
}
Example #14
0
double discAndCalcEntropy(double* dataVector, int vectorLength) {
    ProbabilityState state = discAndCalcProbability(dataVector, vectorLength);
    double h = entropy(state);

    freeProbabilityState(state);

    return h;
}/*discAndCalcEntropy(double* ,int)*/
Example #15
0
double calcEntropy(uint* dataVector, int vectorLength) {
    ProbabilityState state = calculateProbability(dataVector, vectorLength);
    double h = entropy(state);

    freeProbabilityState(state);
    
    return h;
}/*calcEntropy(uint* ,int)*/
Example #16
0
 double information(InternalMatrix& inMx)
     {
         switch(information_measure) {
             case InfoType::gini :
                 return gini(inMx);
             case InfoType::entropy :
                 return entropy(inMx);
         }
     }
Example #17
0
matrix *mi(matrix *m, int n, int k, int g) {
	
	//printf("before mi\n");
	matrix *mi;
	double r[g*((m->rows)-g)];
	
//AM:
	//	mi = new_matrix(m->rows, m->rows);
	mi = new_matrix(g, ((m->rows)-g));
//AM: this is the returned matrix mi: 
	//mi[i][j] is the mi between response vec m[i][] i=0,...,g-1 
	//and predictor vec m[j][] where j=g,...,m->rows 
	
	int *knots;
	knots = (int *)malloc((n + k + 1) * sizeof(int));
	calcKnot(knots, n, k);
	/*int l;
	for (l = 0; l < n + k + 1; ++l)
		printf("%d\n", knots[l]);
	*/
	//printf("before p\n");
	matrix *w;
	w = calcWeights(m, n, k, knots);
//	print_matrix(w);
//	printf("w1: %d, %d\n", w->rows, w->cols);
	//printf("p1: %d, %d\n", p->rows, p->cols);
	//printf("before e\n"); //here
	matrix *e;
	e = entropy(w, m->cols, n);
//	print_matrix(e);
	
	//printf("before e2\n");
	matrix *e2;
	e2 = entropy2(w, m->cols, n, g);
//	print_matrix(e2);
	
	int i,j;

//AM:
	//for (i = 0; i < mi->rows; ++i) {
	for (i = 0; i < g; ++i) {
		for (j = g; j < (m->rows) ; ++j) {
			mi->m[i][j-g] = e->m[i][0] + e->m[j][0] - e2->m[i][j];
//			Rprintf("[i,j,mi] = %d %d %lf\n", i, j, mi->m[i][j]);
		}
	}
	//}
//	print_matrix(mi);
	free(knots);
	free_matrix(w);
	free_matrix(e);
	free_matrix(e2);


	return mi;
	
}
Example #18
0
static int
rfTrainTree(RANDOM_FOREST *rf, TREE *tree, int *training_classes, double **training_data, int ntraining)
{
  int    done = 0, n, iter, tno, fno, f, i ;
  double total_entropy, last_f ;

  total_entropy = entropy(tree->root.class_counts, rf->nclasses, tree->root.class_counts) ;
  
  // make sure there is at least one feature with nonzero range
  for (f = 0 ; f < tree->nfeatures ; f++)
  {
    fno = tree->feature_list[f] ;
    last_f = training_data[tree->root.training_set[0]][fno] ;
    for (i = 1 ; i < tree->root.total_counts ; i++)
    {
      tno = tree->root.training_set[i] ;
      if (training_data[tno][fno] != last_f)
	break ;
    }
    if (i < tree->root.total_counts) 
      break ;  // found one feature with some spread
  }
  if (f >= tree->nfeatures)  // all features are identical - can't train
  {
    rfFindLeaves(tree) ;
    return(ERROR_BADPARM) ;
  }
  iter = 0 ;
  while (!done && !FZERO(total_entropy))
  {
    done = rfTrainNode(rf, tree, &tree->root, training_classes, training_data, rf->ntraining);
    rfFindLeaves(tree) ;
    for (total_entropy = 0.0, n = 0 ; n < tree->nleaves ; n++)
      total_entropy += entropy(tree->leaves[n]->class_counts, rf->nclasses, tree->root.class_counts) ;
    if (Gdiag & DIAG_SHOW && DIAG_VERBOSE_ON)
      printf("\taverage leaf entropy = %2.4f, nleaves = %d, max depth %d\n",
	     total_entropy/tree->nleaves, tree->nleaves, tree->depth) ;
    if (iter++ > 10)
      break ;
  }
  if (tree->nleaves == 0)  // only if loop above never executed
    rfFindLeaves(tree) ;
  return(NO_ERROR) ;
}
Example #19
0
  void computeMIvalues(vector< vector< double> > *mm, MIvalues *val)
  {
    if (!check_jointmatrix(mm)) {throw notajointmatrix();}
    
    int m=mm->size();
    int n=(*mm)[0].size();

    vector <double> Px (m,0.0E-20);
    int p,q;
    for(p=0; p < m; p++)
      for (q=0; q<n; q++)
	{  Px[p]+= (*mm)[p][q]; }
    
    //for(p=0; p < m; p++) {cout << "Px " << Px[p] << "\n";}
    
    (*val).Hx = entropy(&Px); 
    //cout << "Hx " << (*val).Hx << "\n";

    vector <double> Py (n,0.0E-20);
    for (q=0; q<n; q++)
      for(p=0; p < m; p++)
	{ Py[q] += (*mm)[p][q];  }

    //for(q=0; q < n; q++) {cout << "Py " << Py[q] << "\n";}
    
    (*val).Hy = entropy(&Py); 
    //cout << "Hy " << (*val).Hy << "\n";

    double HXY=0.0E-20;
    for (q=0; q<n; q++)
      for(p=0; p < m; p++)
	{
	  if ((*mm)[p][q] > 0)
	    { HXY = HXY + (*mm)[p][q] * MyLog((*mm)[p][q]);}
	}

    //cout << "TT  " << (*mm)[11][11] * MyLog((*mm)[11][11]) << "  \n";


    //cout << "HXY " << HXY << " \n";
    (*val).I=(*val).Hy + (*val).Hx + HXY;

  }
Example #20
0
int
RFtrainTree(RANDOM_FOREST *rf, int tno, int *training_classes, double **training_data, int ntraining)
{
  int    i, f ;
  TREE   *tree ;


  rf->training_data = training_data ;
  rf->training_classes = training_classes ;

  for (f = 0 ; f < rf->nfeatures ; f++)
  {
    rf->feature_min[f] = 1e20 ;
    rf->feature_max[f] = -1e20 ;
    for (i = 0 ; i < ntraining ; i++)
    {
      if (training_data[i][f] < rf->feature_min[f])
        rf->feature_min[f] = training_data[i][f] ;
      if (training_data[i][f] > rf->feature_max[f])
        rf->feature_max[f] = training_data[i][f] ;
    }
  }

  tree = &rf->trees[tno] ;

  tree->feature_list = (int *)calloc(rf->nfeatures, sizeof(tree->feature_list[0]));
  if (tree->feature_list == NULL)
    ErrorExit(ERROR_NOMEMORY, "RFtrain: could not allocate feature list %d (%d)",
	      tno,rf->nfeatures) ;
  tree->nfeatures = rf->nfeatures ;
  
  for (i = 0  ; i < rf->nfeatures  ; i++)
    tree->feature_list[i] = i ;
  

  tree->root.training_set = (int *)calloc(ntraining, sizeof(tree->root.training_set[0])) ;
  if (tree->root.training_set == NULL)
    ErrorExit(ERROR_NOMEMORY, "RFtrainTree: could not allocate root training set") ;

  for (i = 0  ; i < ntraining  ; i++)
  {
    tree->root.class_counts[training_classes[i]]++ ;
    tree->root.training_set[tree->root.total_counts] = i ; // should be +ntraining
    tree->root.total_counts++ ;
  }
  rf->ntraining += ntraining ;

  if (Gdiag & DIAG_SHOW && DIAG_VERBOSE_ON)
    printf("tree %d: initial entropy = %f\n", tno, 
	   entropy(tree->root.class_counts, rf->nclasses, tree->root.class_counts)) ;
  rfTrainTree(rf, tree, training_classes, training_data, rf->ntraining) ;

  return(NO_ERROR) ;
}
Example #21
0
void PasswordChecker::evaluatePasswordStrength(const QString &password, QColor &color, QString &grade, qreal *_fitness)
{
    qreal fitness = 0;
    color.setRgb(153, 153, 153);
    if (password.isEmpty()) {
        grade = "?";
    }
    else {
        fitness = password.size() * entropy(password);
        if (fitness >= 11.0) {
            color.setRgb(0, 255, 30);
            grade = tr("Supercalifragilisticexpialidocious");
        }
        else if (fitness >= 9.0) {
            color.setRgb(0, 255, 30);
            grade = tr("Brutally strong");
        }
        else if (fitness >= 7.0) {
            color.setRgb(0, 255, 30);
            grade = tr("Fabulous");
        }
        else if (fitness >= 5.0) {
            color.setRgb(0, 255, 30);
            grade = tr("Very good");
        }
        else if (fitness >= 4.0) {
            color.setRgb(111, 255, 0);
            grade = tr("Good");
        }
        else if (fitness >= 3.0) {
            color.setRgb(234, 255, 0);
            grade = tr("Mediocre");
        }
        else if (fitness >= 2.5) {
            color.setRgb(255, 153, 0);
            grade = tr("You can do better");
        }
        else if (fitness >= 2.0) {
            color.setRgb(255, 48, 0);
            grade = tr("Bad");
        }
        else if (fitness >= 1.5) {
            color.setRgb(255, 0, 0);
            grade = tr("It can hardly be worse");
        }
        else {
            color.setRgb(200, 0, 0);
            grade = tr("Useless");
        }
    }
    if (_fitness != Q_NULLPTR)
        *_fitness = fitness;
}
Example #22
0
void CCLParallelEncoder::doEncodeFrame(CImage<uint8_t> * pFrame, CBitstream * pBstr, FRAME_TYPE frame_type)
{
	(*(CImage<float>*)m_imgF) = (*pFrame);
	m_imgF->CopyToDevice();
	transform(m_imgF, m_img, m_predTab, frame_type);
	m_dev.Finish();
	m_img->CopyToHost();
	m_predTab->CopyToHost();
	itransform(m_imgF, m_img, m_predTab, frame_type);
	entropy(m_img, m_predTab, pBstr, frame_type);
	m_dev.Finish();
}
Example #23
0
/* ************************************************************************
 * Function that implements the Random Feature Selection process.
 * param :
 * 		node : the current node to be split
 * 		sortedInd : aa array of instance indices sorted by each attribute values.
 *
 * Return the Rule object for the split procedure that have been produced with the selected criterion criterion.
 */
Rule * 		RndTree::randomFeatSelection(Node * node, u_int ** sortedInd)
{
//cout << "random feature selection\n";
    DataHandler * data = node->getDataSet();
    long double bestGain = 0.0;
    double bestSplit = 0.0;
    u_int bestAtt = data->getClassInd();
    bool found = false;

    double w_size = data->w_size();

    // Compute the gini index or entropy value for the current node subset of data.
    long double eval0;
    if(gin) eval0 = gini(data->getDistrib(),data->getNbClass(),w_size);
    else eval0 = entropy(data->getDistrib(),data->getNbClass(),w_size);

    // have a vector to memorize attributes already evaluated.
    vector<u_int> attWindow;
    for(u_int i=0; i<data->dim(); i++)
        if(i != data->getClassInd()) attWindow.push_back(i);

    int k = nbFeat;
//node->getDataSet()->afficheBase();
    while((attWindow.size()>0) && ((k > 0) && (!found)))//||
    {
        int r = 0;
        if(attWindow.size() > 1) r = Utils::randInt(attWindow.size());
        u_int attIndex = attWindow[r];

        double split;

        long double gain = evalAttribute(node,attIndex,sortedInd[attIndex],&split,eval0,w_size);

        if(gain > bestGain)
        {
            bestGain = gain;
            bestAtt = attIndex;
            bestSplit = split;
            found = true;
        }

        attWindow.erase(attWindow.begin()+r);
        k--;
    }

    if(!found) return NULL;

    u_int bestAttId = data->getAttribute(bestAtt)->getId();

    if(data->getAttribute(bestAtt)->is_nominal()) return new Rule(bestAttId,data->getAttribute(bestAtt)->getNbModal());
    else return new Rule(bestAttId,bestSplit);
}
int main(void)
{
	double ret = 0.0;
	ret = entropy(K,arr_len(K));
	printf("entropy(K)=%f\n",ret);
	
	ret = entropy(C,arr_len(C));
        printf("entropy(C)=%f\n",ret);

	ret = entropy(F,arr_len(F));
        printf("entropy(F)=%f\n",ret);

	ret = KL_divergence(K,arr_len(K),C,arr_len(C));
        printf("KL_divergence(K,C)=%f\n",ret);

	ret = KL_divergence(K,arr_len(K),F,arr_len(F));
        printf("KL_divergence(K,F)=%f\n",ret);

	printf("F has more uncertainty than the others, measured by entropy(F) is smallest\n");
	printf("K is similar to C more than F, measured by KL(K,C)<KL(K,F)\n");

}
Example #25
0
// perform actual computation
void forestFindThr( int H, int N, int F, const float *data,
  const uint32 *hs, const float *ws, const uint32 *order, const int split,
  uint32 &fid, float &thr, double &gain )
{
  double *Wl, *Wr, *W; float *data1; uint32 *order1;
  int i, j, j1, j2, h; double vBst, vInit, v, w, wl, wr, g, gl, gr;
  Wl=new double[H]; Wr=new double[H]; W=new double[H];
  // perform initialization
  vBst = vInit = 0; g = 0; w = 0; fid = 1; thr = 0;
  for( i=0; i<H; i++ ) W[i] = 0;
  for( j=0; j<N; j++ ) { w+=ws[j]; W[hs[j]-1]+=ws[j]; }
  if( split==0 ) { for( i=0; i<H; i++ ) g+=gini(W[i]); vBst=vInit=(1-g/w/w); }
  if( split==1 ) { for( i=0; i<H; i++ ) g+=entropy(W[i]); vBst=vInit=g/w; }
  // loop over features, then thresholds (data is sorted by feature value)
  for( i=0; i<F; i++ ) {
    order1=(uint32*) order+i*N; data1=(float*) data+i*size_t(N);
    for( j=0; j<H; j++ ) { Wl[j]=0; Wr[j]=W[j]; } gl=wl=0; gr=g; wr=w;
    for( j=0; j<N-1; j++ ) {
      j1=order1[j]; j2=order1[j+1]; h=hs[j1]-1;
      if(split==0) {
        // gini = 1-\sum_h p_h^2; v = gini_l*pl + gini_r*pr
        wl+=ws[j1]; gl-=gini(Wl[h]); Wl[h]+=ws[j1]; gl+=gini(Wl[h]);
        wr-=ws[j1]; gr-=gini(Wr[h]); Wr[h]-=ws[j1]; gr+=gini(Wr[h]);
        v = (wl-gl/wl)/w + (wr-gr/wr)/w;
      } else if (split==1) {
        // entropy = -\sum_h p_h log(p_h); v = entropy_l*pl + entropy_r*pr
        gl+=entropy(wl); wl+=ws[j1]; gl-=entropy(wl);
        gr+=entropy(wr); wr-=ws[j1]; gr-=entropy(wr);
        gl-=entropy(Wl[h]); Wl[h]+=ws[j1]; gl+=entropy(Wl[h]);
        gr-=entropy(Wr[h]); Wr[h]-=ws[j1]; gr+=entropy(Wr[h]);
        v = gl/w + gr/w;
      } else if (split==2) {
        // twoing: v = pl*pr*\sum_h(|p_h_left - p_h_right|)^2 [slow if H>>0]
        j1=order1[j]; j2=order1[j+1]; h=hs[j1]-1;
        wl+=ws[j1]; Wl[h]+=ws[j1]; wr-=ws[j1]; Wr[h]-=ws[j1];
        g=0; for( int h1=0; h1<H; h1++ ) g+=fabs(Wl[h1]/wl-Wr[h1]/wr);
        v = - wl/w*wr/w*g*g;
      }
      if( v<vBst && data1[j2]-data1[j1]>=1e-6f ) {
        vBst=v; fid=i+1; thr=0.5f*(data1[j1]+data1[j2]); }
    }
  }
  delete [] Wl; delete [] Wr; delete [] W; gain = vInit-vBst;
}
Example #26
0
AtrialFibrApi::AtrialFibrApi(
    const QVector<double> &signal,
    const QVector<QVector<double>::const_iterator> &RPeaksIterators,
    const QVector<QVector<double>::const_iterator> &pWaveStarts)
    : pWaveStarts(pWaveStarts), endOfSignal(signal.end()), entropyResult(0.0),
      divergenceResult(0.0), pWaveOccurenceRatioResult(0.0) {
  rrmethod.RunRRMethod(RPeaksIterators);
  pWaveOccurenceRatioResult = pWaveOccurenceRatio(pWaveStarts, endOfSignal);
  Matrix3_3 patternMatrix = { { { { 0.005, 0.023, 0.06 } },
                                { { 0.007, 0.914, 0.013 } },
                                { { 0.019, 0.006, 0.003 } } } };
  divergenceResult = JKdivergence(rrmethod.getMarkovTable(), patternMatrix);
  entropyResult = entropy(rrmethod.getMarkovTable());
}
Example #27
0
static double
compute_info_gain(RF *rf, TREE *tree, NODE *parent, NODE *left, NODE *right, double **training_data, int fno, double thresh) 
{
  double entropy_before, entropy_after, wl, wr ;
  int    i, tno, c ;
  NODE   *node ;

  entropy_before = entropy(parent->class_counts, rf->nclasses, tree->root.class_counts) ;
  memset(left->class_counts, 0, rf->nclasses*sizeof(left->class_counts[0])) ;
  memset(right->class_counts, 0, rf->nclasses*sizeof(right->class_counts[0])) ;
  left->total_counts = right->total_counts = 0 ;
  for (tno = 0 ; tno < parent->total_counts ; tno++)
  {
    i = parent->training_set[tno] ;
    if (training_data[i][fno] < thresh)
      node = left ;
    else
      node = right ;
      
    node->class_counts[rf->training_classes[i]]++ ;
    node->training_set[node->total_counts] = i ;
    node->total_counts++ ;
  }
  for (wr = wl = 0.0, c = 0 ; c < rf->nclasses ; c++)
  {
    if (tree->root.class_counts[c] == 0)
      continue ;
    wl += (double)left->class_counts[c] / tree->root.class_counts[c] ;
    wr += (double)right->class_counts[c] / tree->root.class_counts[c] ;
  }
  wl = wl / (wl + wr); wr = 1-wl ;
  
  entropy_after = 
    wl * entropy(left->class_counts, rf->nclasses, tree->root.class_counts) +
    wr * entropy(right->class_counts, rf->nclasses, tree->root.class_counts) ;
  return(entropy_before - entropy_after) ;
}
Example #28
0
int main(int argc, char *argv[])
{
    int k = 100000;
    int exp = 1;
    // double c0 = 2, c1 = 3, c2 = 1;
    // double c0 = 1.3, c1 = 8, c2 = 1.5;
    
    std::CommandLine cmd;
    cmd.AddValue ("exp",  "", exp);
    cmd.Parse (argc, argv);
    

    // Set sample size for test
    std::vector<int> testN;
    long n = 1000;
    for ( int i = 0; i < 15; i++ )
    {
        n *= 2;
    }
    testN.push_back( n );

    // Set distribution for test
    std::vector<double> p;
    switch(exp)
    {
    case 0: p = uniform(k); break;
    case 1: p = zipf(k); break;
    case 2: p = zipfd5(k); break;
    case 3: p = mixgeozipf(k); break;

    }
    // Set estimator
    Entropy entropy( k );
    entropy.setDegree( 18 );
    entropy.setInterval( 40 );
    entropy.setThreshold( 18 ); 
    printf("Alphabet size=%d.\n", entropy.getAlphabetSize());
    printf("Polynoimal degree=%d.\n", entropy.getDegree());
    printf("Approximation interval=[0,%.2f/n].\n", entropy.getInterval());
    printf("Plug-in threshold=%d.\n",(int)floor(entropy.getThreshold())+1);
    printf("Unit: bits\n");

    // TEST_fixed_P(p, entropy, testN);

    const int trials = 50;
    TEST_fixed_P_RMSE(p, entropy, testN, trials);
    
    return 0;
}
Example #29
0
/**
 *  Gibbs function in Kelvin (\f$ G/R \f$) for
 *  one species. @param t temperature @param s species object
 */
double gibbs(double t, const Species& s) {
    if (s.thermoFormatType == 1) {
        double s0r = entropy(t, s);
        double h0r = enthalpy(t, s);
        return (h0r - s0r * t);
    }
    const vector_fp* cp;
    if (t > s.tmid) cp = &s.highCoeffs;
    else cp = &s.lowCoeffs;
    const vector_fp& c = *cp;
    double h0rt = c[0] + 0.5*c[1]*t + c[2]*t*t/3.0 + 0.25*c[3]*t*t*t
                  + 0.2*c[4]*t*t*t*t + c[5]/t;
    double s0r = c[0]*log(t) + c[1]*t + 0.5*c[2]*t*t + c[3]*t*t*t/3.0
                 + 0.25*c[4]*t*t*t*t + c[6];
    return t*(h0rt - s0r);
}
Example #30
0
File: cart.cpp Project: ngoix/OCRF
/* ************************************************************************
 * Function that evaluates the quality of the current split
 * param :
 * 		n : the weighted size of the current node's subset
 * 		nbClass : the number of class possible values
 * 		distribs : a 2D array to memorize class distribution for each child node to be created
 * 		tots : an array of total size of each child node subset
 * 		nbSplit : the number of child node to be created
 */
long double 	Cart::eval(double n, u_int nbClass, double ** distribs, double * tots, u_int nbSplit)
{
    long double eval = 0.0;

    for(u_int i=0; i<nbSplit; i++)
    {
        if(tots[i] != 0.0)
        {
            long double i_t;
            if(gin) i_t = gini(distribs[i],nbClass,tots[i]);
            else i_t = entropy(distribs[i],nbClass,tots[i]);
            eval += ((tots[i]/n) * i_t);
        }
    }

    return eval;
}