static double rfFeatureInfoGain(RANDOM_FOREST *rf, TREE *tree, NODE *parent, NODE *left, NODE *right, int fno, int *ptotal_count) { int c ; double info_gain = 0, entropy_before, entropy_after, wr, wl ; if (left != NULL) // not a leaf and uses this feature { if (parent->feature == fno) { entropy_before = entropy(parent->class_counts, rf->nclasses, tree->root.class_counts) ; for (wr = wl = 0.0, c = 0 ; c < rf->nclasses ; c++) { if (tree->root.class_counts[c] == 0) continue ; wl += (double)left->class_counts[c] / tree->root.class_counts[c] ; wr += (double)right->class_counts[c] / tree->root.class_counts[c] ; } wl = wl / (wl + wr); wr = 1-wl ; entropy_after = wl * entropy(left->class_counts, rf->nclasses, tree->root.class_counts) + wr * entropy(right->class_counts, rf->nclasses, tree->root.class_counts) ; info_gain = ((double)parent->total_counts * (entropy_before-entropy_after)) ; *ptotal_count += parent->total_counts ; } else info_gain = 0 ; info_gain += rfFeatureInfoGain(rf, tree, left, left->left, left->right, fno, ptotal_count) ; info_gain += rfFeatureInfoGain(rf, tree, right, right->left, right->right, fno, ptotal_count) ; } return(info_gain) ; }
// Function to compute (naive) mutual information double mutualinfo(const double *cij, const int numi, const int numj) { int i,j,k; double I; // Compute marginal distributions double *pi = (double *) mxMalloc(numi*sizeof(double)); //new double[numi]; double *pj = (double *) mxMalloc(numj*sizeof(double)); //new double[numj]; double *pij = (double *) mxMalloc(numi*numj*sizeof(double)); //new double[numi*numj]; double N =0.; for (i=0; i<numi; i++) { pi[i] = 0.0; } for (j=0; j<numj; j++) { pj[j] = 0.0; } for (k=0; k<numi*numj; k++) { N += cij[k]; pij[k] = 0.; } for (i=0; i<numi; i++) { for (j=0; j<numj; j++) { pij[i+j*numi] = cij[i+j*numi]/N; pi[i] += pij[i+j*numi]; pj[j] += pij[i+j*numi]; } } // Compute MI using entropy of joint and marginal distributions I = entropy(pi,numi) + entropy(pj,numj) - entropy(pij, numi*numj); mxFree(pi); //delete[] pi; mxFree(pj); //delete[] pj; mxFree(pij); //delete[] pij; return I; }
static int rfTrainNode(RANDOM_FOREST *rf, TREE *tree, NODE *node, int *training_classes, double **training_data, int ntraining) { if (node->left == NULL) // not trained yet { node->left = rfAllocateNode(node->total_counts, node->depth+1, rf->nclasses) ; node->right = rfAllocateNode(node->total_counts, node->depth+1, rf->nclasses) ; if (find_optimal_feature_and_threshold(rf, tree, node, node->left, node->right, training_classes, training_data, rf->ntraining) == 0) { // couldn't find a threshold to improve separation rfFreeNode(&node->left) ; rfFreeNode(&node->right) ; return(0) ; } } if (node->depth > tree->depth) tree->depth = node->depth ; if (node->depth < rf->max_depth) { if (entropy(node->left->class_counts, rf->nclasses, tree->root.class_counts) > 0) rfTrainNode(rf, tree, node->left, training_classes, training_data, ntraining) ; if (entropy(node->right->class_counts, rf->nclasses, tree->root.class_counts) > 0) rfTrainNode(rf, tree, node->right, training_classes, training_data, ntraining) ; } return(1) ; }
double conditional_entropy(const double* H, double* hJ, unsigned int clampI, unsigned int clampJ) { double n; double entIJ, entJ; _marginalize(hJ, H, clampI, clampJ, 1); entIJ = entropy(H, clampI*clampJ, &n); entJ = entropy(hJ, clampJ, &n); return(entIJ - entJ); /* Entropy of I given J */ }
double mutual_information(const double* H, double* hI, unsigned int clampI, double* hJ, unsigned int clampJ, double* n) { double entIJ, entI, entJ; _marginalize(hI, H, clampI, clampJ, 0); _marginalize(hJ, H, clampI, clampJ, 1); entI = entropy(hI, clampI, n); entJ = entropy(hJ, clampJ, n); entIJ = entropy(H, clampI*clampJ, n); return(entI + entJ - entIJ); }
/** * @brief Computes rolling hash on entropic data from a randomSource if data * meets threshold on entropy estimate. * * @param randomSource pointer to a RandomSource. * * @return true, if data was entropic enough to be processed. */ bool SeedGenerator::processFromSource(RandomSource* randomSource) { // Check if seed can already been computed. if (_seedReady) { return false; // Cannot process data until seed is flushed or reset. } // Compute avg. bit occurrence in a sample from randomSource. std::vector<double> sampleAvgVec = randomSource->bitEntropy(); double sum = std::accumulate(sampleAvgVec.begin(),sampleAvgVec.end(),0.0f); double avgSampleEntropy = sum/static_cast<double>(sampleAvgVec.size()); // Check if estimate meets threshold. if (avgSampleEntropy < SeedGenerator::ENTROPYTHRESHOLD) { // Data not good enough. std::cerr << "[Entropy Error] Sample entropy estimate low" << std::endl; return false; } // Load bytes from randomsource into randomData. std::vector<uint8_t> randomData; randomSource->appendData(randomData); // Split random bytes into _numDivs to compute _numDivs hashes. auto it = randomData.data(); int stepSize = randomData.size() / _numDivs; int excess = randomData.size() % _numDivs; // Loop through batches of data. for (int i = 0; i < (_numDivs - 1); ++i) { /* Compute avg. bit occurrence in a byte for this batch and check if * it meets the threshold. */ if (!entropy(it, it + stepSize)) { // Data not good enough std::cerr << "[Error] Byte entropy estimate low" << std::endl; return false; } // Compute rolling hash for this batch. _hashVec[i].Update(it, stepSize); it = it + stepSize; } // Final batch if (!entropy(it, it + stepSize + excess)) { // Data not good enough std::cerr << "[Error] Byte entropy estimate low" << std::endl; return false; } _hashVec[_numDivs-1].Update(it, stepSize + excess); return true; }
static void f(void) { //int n = 22; //uint8_t data[22] = {1, 2, 3, 7, 7, 7, 1, 2, 3, // 200, 2, 3, 222, 222, 222, 1, 2, 3, 44, 45, 46, 47}; int n = 100003; uint8_t data[n]; for (int i = 0; i < n; i++) data[i] = rand(); fprintf(stderr, "\n\n\noriginal data:\n"); for (int i = 0; i < n; i++) fprintf(stderr, "%d%s", data[i], (i&&(0==(i+1)%77))?"\n":" "); fprintf(stderr, "\n"); fprintf(stderr, "entropy = %g\n", entropy(data, n)); int ne; uint8_t *edata = alloc_and_transform_from_RAW_to_RLE8(data, n, &ne); //int ne2; //uint8_t *edata2 = alloc_and_transform_from_RAW_to_B64(data, n, &ne2); fprintf(stderr, "\nPCX data:\n"); for (int i = 0; i < ne; i++) fprintf(stderr, "%d%s", edata[i], (i&&(0==(i+1)%77))?"\n":" "); fprintf(stderr, "\n"); fprintf(stderr, "%d => %d\n", n, ne); fprintf(stderr, "entropy = %g\n", entropy(edata, ne)); int dne; uint8_t *dedata = alloc_and_transform_from_RLE8_to_RAW(edata, ne, &dne); fprintf(stderr, "\ndata recovered from PCX:\n"); for (int i = 0; i < dne; i++) fprintf(stderr, "%d%s", dedata[i], (i&&(0==(i+1)%77))?"\n":" "); fprintf(stderr, "\n"); fprintf(stderr, "n=%d dne=%d\n", n, dne); assert(n == dne); for (int i = 0; i < n; i++) assert(data[i] == dedata[i]); free(edata); free(dedata); //free(edata2); }
double normalized_mutual_information(const double* H, double* hI, unsigned int clampI, double* hJ, unsigned int clampJ, double *n) { double entIJ, entI, entJ, aux; _marginalize(hI, H, clampI, clampJ, 0); _marginalize(hJ, H, clampI, clampJ, 1); entI = entropy(hI, clampI, n); entJ = entropy(hJ, clampJ, n); entIJ = entropy(H, clampI*clampJ, n); aux = entI + entJ; if (aux > 0.0) return(2*(1-entIJ/aux)); else return 0.0; }
double multiinformation(const int *d, int nsamples, int nvars, int c) { bool *sel = new bool[nvars]; double sum = 0; for( int i=0; i<nvars; ++i ) sel[i] = false; for(int i=0;i<nvars; ++i) { sel[i] = true; sum += entropy(d, nsamples, nvars, c, sel); sel[i] = false; } for( int i=0; i<nvars; ++i ) sel[i] = true; sum -= entropy(d, nsamples, nvars, c, sel); return sum; }
inline TR entanglement(const T1& rho1, arma::uvec dim) { const auto& p = as_Mat(rho1); #ifndef QICLIB_NO_DEBUG bool checkV = true; if (p.n_cols == 1) checkV = false; if (p.n_elem == 0) throw Exception("qic::entanglement", Exception::type::ZERO_SIZE); if (checkV) if (p.n_rows != p.n_cols) throw Exception("qic::entanglement", Exception::type::MATRIX_NOT_SQUARE_OR_CVECTOR); if (arma::any(dim) == 0) throw Exception("qic::entanglement", Exception::type::INVALID_DIMS); if (arma::prod(dim) != p.n_rows) throw Exception("qic::entanglement", Exception::type::DIMS_MISMATCH_MATRIX); if ((dim.n_elem) != 2) throw Exception("qic::entanglement", Exception::type::NOT_BIPARTITE); #endif return entropy(TrX(p, {1}, std::move(dim))); }
int _tmain(int argc, _TCHAR* argv[]) { std::string entropy("entropy"); Crypto cryptoA(std::vector<BYTE>(entropy.begin(), entropy.end())); Crypto cryptoB(std::vector<BYTE>(entropy.begin(), entropy.end())); int total = 5; int failed = 0 + TestGeneratePassword(cryptoA, "0123456789abcdef", 16) + TestEncryptDecrypt(cryptoA, cryptoB) + TestCreateKey25519(DudRandomSource(0x55), L"83BA66B48DF6777D6EB6DDA90E9792319AF48D3BA3210620E7B4641C4F88C476") + TestCreateKey25519(DudRandomSource(0xAA), L"41ACE9D483B7CC3F75640E04D7AACA6C2BA8F44854FEA5158598D49B382E0407") + TestSharedKey25519(DudRandomSource(0x55), DudRandomSource(0xAA), L"99982C6AA51244F9CF49295A8EF0B882E2FED6C131F106556C803143758946E2") ; if (failed) { std::wcout << L"FAILED (" << failed << L" of " << total << L" tests)" << std::endl; } else { std::wcout << L"PASSED (" << total << L" tests)" << std::endl; } return 0; }
/* Update the best split if necessary */ static void updateSplit(int feature, float threshold, float posleft, float negleft, node_t* node, split_t* split){ float posright = max(FLT_EPSILON, node->pos - posleft); float negright = max(FLT_EPSILON, node->neg - negleft); float sizeleft = posleft+negleft; float sizeright = posright+negright; float total = node->pos+node->neg; float gain = -(sizeleft/total*entropy(posleft/sizeleft)+sizeright/total*entropy(posright/sizeright)); if (gain > split->gain){ split->gain = gain; split->feature = feature; split->threshold = threshold; split->posleft = posleft; split->negleft = negleft; split->posright = posright; split->negright = negright; } }
void CCLEncoder::doEncodeFrame(CImage<uint8_t> * pFrame, CBitstream * pBstr, FRAME_TYPE frame_type) { (*static_cast<CImage<float>*>(m_imgF)) = (*pFrame); m_imgF->CopyToDevice(); transform(m_imgF, m_img, m_predTab, frame_type); entropy(m_img, m_predTab, pBstr, frame_type); itransform(m_imgF, m_img, m_predTab, frame_type); }
double discAndCalcEntropy(double* dataVector, int vectorLength) { ProbabilityState state = discAndCalcProbability(dataVector, vectorLength); double h = entropy(state); freeProbabilityState(state); return h; }/*discAndCalcEntropy(double* ,int)*/
double calcEntropy(uint* dataVector, int vectorLength) { ProbabilityState state = calculateProbability(dataVector, vectorLength); double h = entropy(state); freeProbabilityState(state); return h; }/*calcEntropy(uint* ,int)*/
double information(InternalMatrix& inMx) { switch(information_measure) { case InfoType::gini : return gini(inMx); case InfoType::entropy : return entropy(inMx); } }
matrix *mi(matrix *m, int n, int k, int g) { //printf("before mi\n"); matrix *mi; double r[g*((m->rows)-g)]; //AM: // mi = new_matrix(m->rows, m->rows); mi = new_matrix(g, ((m->rows)-g)); //AM: this is the returned matrix mi: //mi[i][j] is the mi between response vec m[i][] i=0,...,g-1 //and predictor vec m[j][] where j=g,...,m->rows int *knots; knots = (int *)malloc((n + k + 1) * sizeof(int)); calcKnot(knots, n, k); /*int l; for (l = 0; l < n + k + 1; ++l) printf("%d\n", knots[l]); */ //printf("before p\n"); matrix *w; w = calcWeights(m, n, k, knots); // print_matrix(w); // printf("w1: %d, %d\n", w->rows, w->cols); //printf("p1: %d, %d\n", p->rows, p->cols); //printf("before e\n"); //here matrix *e; e = entropy(w, m->cols, n); // print_matrix(e); //printf("before e2\n"); matrix *e2; e2 = entropy2(w, m->cols, n, g); // print_matrix(e2); int i,j; //AM: //for (i = 0; i < mi->rows; ++i) { for (i = 0; i < g; ++i) { for (j = g; j < (m->rows) ; ++j) { mi->m[i][j-g] = e->m[i][0] + e->m[j][0] - e2->m[i][j]; // Rprintf("[i,j,mi] = %d %d %lf\n", i, j, mi->m[i][j]); } } //} // print_matrix(mi); free(knots); free_matrix(w); free_matrix(e); free_matrix(e2); return mi; }
static int rfTrainTree(RANDOM_FOREST *rf, TREE *tree, int *training_classes, double **training_data, int ntraining) { int done = 0, n, iter, tno, fno, f, i ; double total_entropy, last_f ; total_entropy = entropy(tree->root.class_counts, rf->nclasses, tree->root.class_counts) ; // make sure there is at least one feature with nonzero range for (f = 0 ; f < tree->nfeatures ; f++) { fno = tree->feature_list[f] ; last_f = training_data[tree->root.training_set[0]][fno] ; for (i = 1 ; i < tree->root.total_counts ; i++) { tno = tree->root.training_set[i] ; if (training_data[tno][fno] != last_f) break ; } if (i < tree->root.total_counts) break ; // found one feature with some spread } if (f >= tree->nfeatures) // all features are identical - can't train { rfFindLeaves(tree) ; return(ERROR_BADPARM) ; } iter = 0 ; while (!done && !FZERO(total_entropy)) { done = rfTrainNode(rf, tree, &tree->root, training_classes, training_data, rf->ntraining); rfFindLeaves(tree) ; for (total_entropy = 0.0, n = 0 ; n < tree->nleaves ; n++) total_entropy += entropy(tree->leaves[n]->class_counts, rf->nclasses, tree->root.class_counts) ; if (Gdiag & DIAG_SHOW && DIAG_VERBOSE_ON) printf("\taverage leaf entropy = %2.4f, nleaves = %d, max depth %d\n", total_entropy/tree->nleaves, tree->nleaves, tree->depth) ; if (iter++ > 10) break ; } if (tree->nleaves == 0) // only if loop above never executed rfFindLeaves(tree) ; return(NO_ERROR) ; }
void computeMIvalues(vector< vector< double> > *mm, MIvalues *val) { if (!check_jointmatrix(mm)) {throw notajointmatrix();} int m=mm->size(); int n=(*mm)[0].size(); vector <double> Px (m,0.0E-20); int p,q; for(p=0; p < m; p++) for (q=0; q<n; q++) { Px[p]+= (*mm)[p][q]; } //for(p=0; p < m; p++) {cout << "Px " << Px[p] << "\n";} (*val).Hx = entropy(&Px); //cout << "Hx " << (*val).Hx << "\n"; vector <double> Py (n,0.0E-20); for (q=0; q<n; q++) for(p=0; p < m; p++) { Py[q] += (*mm)[p][q]; } //for(q=0; q < n; q++) {cout << "Py " << Py[q] << "\n";} (*val).Hy = entropy(&Py); //cout << "Hy " << (*val).Hy << "\n"; double HXY=0.0E-20; for (q=0; q<n; q++) for(p=0; p < m; p++) { if ((*mm)[p][q] > 0) { HXY = HXY + (*mm)[p][q] * MyLog((*mm)[p][q]);} } //cout << "TT " << (*mm)[11][11] * MyLog((*mm)[11][11]) << " \n"; //cout << "HXY " << HXY << " \n"; (*val).I=(*val).Hy + (*val).Hx + HXY; }
int RFtrainTree(RANDOM_FOREST *rf, int tno, int *training_classes, double **training_data, int ntraining) { int i, f ; TREE *tree ; rf->training_data = training_data ; rf->training_classes = training_classes ; for (f = 0 ; f < rf->nfeatures ; f++) { rf->feature_min[f] = 1e20 ; rf->feature_max[f] = -1e20 ; for (i = 0 ; i < ntraining ; i++) { if (training_data[i][f] < rf->feature_min[f]) rf->feature_min[f] = training_data[i][f] ; if (training_data[i][f] > rf->feature_max[f]) rf->feature_max[f] = training_data[i][f] ; } } tree = &rf->trees[tno] ; tree->feature_list = (int *)calloc(rf->nfeatures, sizeof(tree->feature_list[0])); if (tree->feature_list == NULL) ErrorExit(ERROR_NOMEMORY, "RFtrain: could not allocate feature list %d (%d)", tno,rf->nfeatures) ; tree->nfeatures = rf->nfeatures ; for (i = 0 ; i < rf->nfeatures ; i++) tree->feature_list[i] = i ; tree->root.training_set = (int *)calloc(ntraining, sizeof(tree->root.training_set[0])) ; if (tree->root.training_set == NULL) ErrorExit(ERROR_NOMEMORY, "RFtrainTree: could not allocate root training set") ; for (i = 0 ; i < ntraining ; i++) { tree->root.class_counts[training_classes[i]]++ ; tree->root.training_set[tree->root.total_counts] = i ; // should be +ntraining tree->root.total_counts++ ; } rf->ntraining += ntraining ; if (Gdiag & DIAG_SHOW && DIAG_VERBOSE_ON) printf("tree %d: initial entropy = %f\n", tno, entropy(tree->root.class_counts, rf->nclasses, tree->root.class_counts)) ; rfTrainTree(rf, tree, training_classes, training_data, rf->ntraining) ; return(NO_ERROR) ; }
void PasswordChecker::evaluatePasswordStrength(const QString &password, QColor &color, QString &grade, qreal *_fitness) { qreal fitness = 0; color.setRgb(153, 153, 153); if (password.isEmpty()) { grade = "?"; } else { fitness = password.size() * entropy(password); if (fitness >= 11.0) { color.setRgb(0, 255, 30); grade = tr("Supercalifragilisticexpialidocious"); } else if (fitness >= 9.0) { color.setRgb(0, 255, 30); grade = tr("Brutally strong"); } else if (fitness >= 7.0) { color.setRgb(0, 255, 30); grade = tr("Fabulous"); } else if (fitness >= 5.0) { color.setRgb(0, 255, 30); grade = tr("Very good"); } else if (fitness >= 4.0) { color.setRgb(111, 255, 0); grade = tr("Good"); } else if (fitness >= 3.0) { color.setRgb(234, 255, 0); grade = tr("Mediocre"); } else if (fitness >= 2.5) { color.setRgb(255, 153, 0); grade = tr("You can do better"); } else if (fitness >= 2.0) { color.setRgb(255, 48, 0); grade = tr("Bad"); } else if (fitness >= 1.5) { color.setRgb(255, 0, 0); grade = tr("It can hardly be worse"); } else { color.setRgb(200, 0, 0); grade = tr("Useless"); } } if (_fitness != Q_NULLPTR) *_fitness = fitness; }
void CCLParallelEncoder::doEncodeFrame(CImage<uint8_t> * pFrame, CBitstream * pBstr, FRAME_TYPE frame_type) { (*(CImage<float>*)m_imgF) = (*pFrame); m_imgF->CopyToDevice(); transform(m_imgF, m_img, m_predTab, frame_type); m_dev.Finish(); m_img->CopyToHost(); m_predTab->CopyToHost(); itransform(m_imgF, m_img, m_predTab, frame_type); entropy(m_img, m_predTab, pBstr, frame_type); m_dev.Finish(); }
/* ************************************************************************ * Function that implements the Random Feature Selection process. * param : * node : the current node to be split * sortedInd : aa array of instance indices sorted by each attribute values. * * Return the Rule object for the split procedure that have been produced with the selected criterion criterion. */ Rule * RndTree::randomFeatSelection(Node * node, u_int ** sortedInd) { //cout << "random feature selection\n"; DataHandler * data = node->getDataSet(); long double bestGain = 0.0; double bestSplit = 0.0; u_int bestAtt = data->getClassInd(); bool found = false; double w_size = data->w_size(); // Compute the gini index or entropy value for the current node subset of data. long double eval0; if(gin) eval0 = gini(data->getDistrib(),data->getNbClass(),w_size); else eval0 = entropy(data->getDistrib(),data->getNbClass(),w_size); // have a vector to memorize attributes already evaluated. vector<u_int> attWindow; for(u_int i=0; i<data->dim(); i++) if(i != data->getClassInd()) attWindow.push_back(i); int k = nbFeat; //node->getDataSet()->afficheBase(); while((attWindow.size()>0) && ((k > 0) && (!found)))//|| { int r = 0; if(attWindow.size() > 1) r = Utils::randInt(attWindow.size()); u_int attIndex = attWindow[r]; double split; long double gain = evalAttribute(node,attIndex,sortedInd[attIndex],&split,eval0,w_size); if(gain > bestGain) { bestGain = gain; bestAtt = attIndex; bestSplit = split; found = true; } attWindow.erase(attWindow.begin()+r); k--; } if(!found) return NULL; u_int bestAttId = data->getAttribute(bestAtt)->getId(); if(data->getAttribute(bestAtt)->is_nominal()) return new Rule(bestAttId,data->getAttribute(bestAtt)->getNbModal()); else return new Rule(bestAttId,bestSplit); }
int main(void) { double ret = 0.0; ret = entropy(K,arr_len(K)); printf("entropy(K)=%f\n",ret); ret = entropy(C,arr_len(C)); printf("entropy(C)=%f\n",ret); ret = entropy(F,arr_len(F)); printf("entropy(F)=%f\n",ret); ret = KL_divergence(K,arr_len(K),C,arr_len(C)); printf("KL_divergence(K,C)=%f\n",ret); ret = KL_divergence(K,arr_len(K),F,arr_len(F)); printf("KL_divergence(K,F)=%f\n",ret); printf("F has more uncertainty than the others, measured by entropy(F) is smallest\n"); printf("K is similar to C more than F, measured by KL(K,C)<KL(K,F)\n"); }
// perform actual computation void forestFindThr( int H, int N, int F, const float *data, const uint32 *hs, const float *ws, const uint32 *order, const int split, uint32 &fid, float &thr, double &gain ) { double *Wl, *Wr, *W; float *data1; uint32 *order1; int i, j, j1, j2, h; double vBst, vInit, v, w, wl, wr, g, gl, gr; Wl=new double[H]; Wr=new double[H]; W=new double[H]; // perform initialization vBst = vInit = 0; g = 0; w = 0; fid = 1; thr = 0; for( i=0; i<H; i++ ) W[i] = 0; for( j=0; j<N; j++ ) { w+=ws[j]; W[hs[j]-1]+=ws[j]; } if( split==0 ) { for( i=0; i<H; i++ ) g+=gini(W[i]); vBst=vInit=(1-g/w/w); } if( split==1 ) { for( i=0; i<H; i++ ) g+=entropy(W[i]); vBst=vInit=g/w; } // loop over features, then thresholds (data is sorted by feature value) for( i=0; i<F; i++ ) { order1=(uint32*) order+i*N; data1=(float*) data+i*size_t(N); for( j=0; j<H; j++ ) { Wl[j]=0; Wr[j]=W[j]; } gl=wl=0; gr=g; wr=w; for( j=0; j<N-1; j++ ) { j1=order1[j]; j2=order1[j+1]; h=hs[j1]-1; if(split==0) { // gini = 1-\sum_h p_h^2; v = gini_l*pl + gini_r*pr wl+=ws[j1]; gl-=gini(Wl[h]); Wl[h]+=ws[j1]; gl+=gini(Wl[h]); wr-=ws[j1]; gr-=gini(Wr[h]); Wr[h]-=ws[j1]; gr+=gini(Wr[h]); v = (wl-gl/wl)/w + (wr-gr/wr)/w; } else if (split==1) { // entropy = -\sum_h p_h log(p_h); v = entropy_l*pl + entropy_r*pr gl+=entropy(wl); wl+=ws[j1]; gl-=entropy(wl); gr+=entropy(wr); wr-=ws[j1]; gr-=entropy(wr); gl-=entropy(Wl[h]); Wl[h]+=ws[j1]; gl+=entropy(Wl[h]); gr-=entropy(Wr[h]); Wr[h]-=ws[j1]; gr+=entropy(Wr[h]); v = gl/w + gr/w; } else if (split==2) { // twoing: v = pl*pr*\sum_h(|p_h_left - p_h_right|)^2 [slow if H>>0] j1=order1[j]; j2=order1[j+1]; h=hs[j1]-1; wl+=ws[j1]; Wl[h]+=ws[j1]; wr-=ws[j1]; Wr[h]-=ws[j1]; g=0; for( int h1=0; h1<H; h1++ ) g+=fabs(Wl[h1]/wl-Wr[h1]/wr); v = - wl/w*wr/w*g*g; } if( v<vBst && data1[j2]-data1[j1]>=1e-6f ) { vBst=v; fid=i+1; thr=0.5f*(data1[j1]+data1[j2]); } } } delete [] Wl; delete [] Wr; delete [] W; gain = vInit-vBst; }
AtrialFibrApi::AtrialFibrApi( const QVector<double> &signal, const QVector<QVector<double>::const_iterator> &RPeaksIterators, const QVector<QVector<double>::const_iterator> &pWaveStarts) : pWaveStarts(pWaveStarts), endOfSignal(signal.end()), entropyResult(0.0), divergenceResult(0.0), pWaveOccurenceRatioResult(0.0) { rrmethod.RunRRMethod(RPeaksIterators); pWaveOccurenceRatioResult = pWaveOccurenceRatio(pWaveStarts, endOfSignal); Matrix3_3 patternMatrix = { { { { 0.005, 0.023, 0.06 } }, { { 0.007, 0.914, 0.013 } }, { { 0.019, 0.006, 0.003 } } } }; divergenceResult = JKdivergence(rrmethod.getMarkovTable(), patternMatrix); entropyResult = entropy(rrmethod.getMarkovTable()); }
static double compute_info_gain(RF *rf, TREE *tree, NODE *parent, NODE *left, NODE *right, double **training_data, int fno, double thresh) { double entropy_before, entropy_after, wl, wr ; int i, tno, c ; NODE *node ; entropy_before = entropy(parent->class_counts, rf->nclasses, tree->root.class_counts) ; memset(left->class_counts, 0, rf->nclasses*sizeof(left->class_counts[0])) ; memset(right->class_counts, 0, rf->nclasses*sizeof(right->class_counts[0])) ; left->total_counts = right->total_counts = 0 ; for (tno = 0 ; tno < parent->total_counts ; tno++) { i = parent->training_set[tno] ; if (training_data[i][fno] < thresh) node = left ; else node = right ; node->class_counts[rf->training_classes[i]]++ ; node->training_set[node->total_counts] = i ; node->total_counts++ ; } for (wr = wl = 0.0, c = 0 ; c < rf->nclasses ; c++) { if (tree->root.class_counts[c] == 0) continue ; wl += (double)left->class_counts[c] / tree->root.class_counts[c] ; wr += (double)right->class_counts[c] / tree->root.class_counts[c] ; } wl = wl / (wl + wr); wr = 1-wl ; entropy_after = wl * entropy(left->class_counts, rf->nclasses, tree->root.class_counts) + wr * entropy(right->class_counts, rf->nclasses, tree->root.class_counts) ; return(entropy_before - entropy_after) ; }
int main(int argc, char *argv[]) { int k = 100000; int exp = 1; // double c0 = 2, c1 = 3, c2 = 1; // double c0 = 1.3, c1 = 8, c2 = 1.5; std::CommandLine cmd; cmd.AddValue ("exp", "", exp); cmd.Parse (argc, argv); // Set sample size for test std::vector<int> testN; long n = 1000; for ( int i = 0; i < 15; i++ ) { n *= 2; } testN.push_back( n ); // Set distribution for test std::vector<double> p; switch(exp) { case 0: p = uniform(k); break; case 1: p = zipf(k); break; case 2: p = zipfd5(k); break; case 3: p = mixgeozipf(k); break; } // Set estimator Entropy entropy( k ); entropy.setDegree( 18 ); entropy.setInterval( 40 ); entropy.setThreshold( 18 ); printf("Alphabet size=%d.\n", entropy.getAlphabetSize()); printf("Polynoimal degree=%d.\n", entropy.getDegree()); printf("Approximation interval=[0,%.2f/n].\n", entropy.getInterval()); printf("Plug-in threshold=%d.\n",(int)floor(entropy.getThreshold())+1); printf("Unit: bits\n"); // TEST_fixed_P(p, entropy, testN); const int trials = 50; TEST_fixed_P_RMSE(p, entropy, testN, trials); return 0; }
/** * Gibbs function in Kelvin (\f$ G/R \f$) for * one species. @param t temperature @param s species object */ double gibbs(double t, const Species& s) { if (s.thermoFormatType == 1) { double s0r = entropy(t, s); double h0r = enthalpy(t, s); return (h0r - s0r * t); } const vector_fp* cp; if (t > s.tmid) cp = &s.highCoeffs; else cp = &s.lowCoeffs; const vector_fp& c = *cp; double h0rt = c[0] + 0.5*c[1]*t + c[2]*t*t/3.0 + 0.25*c[3]*t*t*t + 0.2*c[4]*t*t*t*t + c[5]/t; double s0r = c[0]*log(t) + c[1]*t + 0.5*c[2]*t*t + c[3]*t*t*t/3.0 + 0.25*c[4]*t*t*t*t + c[6]; return t*(h0rt - s0r); }
/* ************************************************************************ * Function that evaluates the quality of the current split * param : * n : the weighted size of the current node's subset * nbClass : the number of class possible values * distribs : a 2D array to memorize class distribution for each child node to be created * tots : an array of total size of each child node subset * nbSplit : the number of child node to be created */ long double Cart::eval(double n, u_int nbClass, double ** distribs, double * tots, u_int nbSplit) { long double eval = 0.0; for(u_int i=0; i<nbSplit; i++) { if(tots[i] != 0.0) { long double i_t; if(gin) i_t = gini(distribs[i],nbClass,tots[i]); else i_t = entropy(distribs[i],nbClass,tots[i]); eval += ((tots[i]/n) * i_t); } } return eval; }