int getMaxGain(int parent, int S, int values[LENGTH][LENGTH], int nvalues, int nattributes, att * test) { float max=0; int maxAtt; int i; float gain; for(i=-1; i>-nattributes; i--) { if(isUsed(i, test)==0) { gain=getGain(parent, S, i, values, nvalues, nattributes, test); printf("Gain of %d and %d is %lf\n", S, i, gain); if(gain>max) { max=gain; maxAtt=i; } } } if (max==0) { int pos=positiveValues(0, S, values, nvalues, nattributes, test); int neg=negativeValues(0, S, values, nvalues, nattributes, test); if (pos>neg) { maxAtt=100; } else { maxAtt=101; } } return maxAtt; }
float getEntropy(int S, int attribute, int values[LENGTH][LENGTH], int nvalues, int nattributes, att * test) { float entropy; int positive; int negative; attnode * curr; curr=test->head; while(curr!=NULL) { if(curr->equivalent==attribute) { break; } curr=curr->next; } if(S==0) { positive=positiveValues(S, attribute, values, nvalues, nattributes, test); negative=negativeValues(S, attribute, values, nvalues, nattributes, test); } else { updatePosNeg(S, attribute, values, nvalues, nattributes, test); positive=curr->positivetracker; negative=curr->negativetracker; } float pos; float neg; float logpos; float logneg; int total; total=positive+negative; pos=(float)positive/total; neg=(float)negative/total; logpos=log2(pos); logneg=log2(neg); if(positive==0||negative==0) { entropy=0; } else if(positive==negative) { entropy=1; } else { entropy=(-pos*logpos)-(neg*logneg); printf("entropy of %d wrt %d=-%d/%dlg%d/%d - %d/%dlg%d/%d=%lf\n", attribute, S, positive, total, positive, total, negative, total, negative, total, entropy); } //printf("%d %lf %lf\n", total, pos, neg); //printf("\nanswer: %lf\n", entropy); return entropy; }
void PlotDataFileCreator::createPlottableDatafile(std::string filename, std::string preamble, int max, int min, long* data, int dataSize, bool commentNeg) { const int maxValue = max; const int minValue = min; const int latHitArraySize = maxValue + 1; const long* plotValues = data; int hits[latHitArraySize]; for(int i = 0; i < latHitArraySize; i++) { hits[i] = 0; } const int loopLength = dataSize; for(int i = 0; i < loopLength; i++) { if(plotValues[i] >= 0) { hits[plotValues[i]]++; } } std::ofstream fs; fs.open(filename.c_str()); fs << preamble; std::stringstream positiveValues; for(int i = 0; i < latHitArraySize; i++) { positiveValues << std::setfill('0') << std::setw(6) << i << " \t" << std::setfill('0') << std::setw(6) << hits[i] << std::endl; } std::stringstream negativeValues(""); if(minValue < 0) { const int negHitArraySize = minValue*(-1) + 1; if(commentNeg) { negativeValues << "# negative Values following" << std::endl; } int negHits[negHitArraySize]; for(int i = 0; i < negHitArraySize; i++) { negHits[i] = 0; } for(int i = 0; i < loopLength; i++) { if(plotValues[i] < 0) { negHits[plotValues[i] * (-1)]++; } } for(int i = negHitArraySize-1; i > 0; i--) { if(commentNeg) { negativeValues << "#"; } negativeValues << "-" << std::setfill('0') << std::setw(6) << i << " \t" << std::setfill('0') << std::setw(6) << negHits[i] << std::endl; } } if(commentNeg) { fs << positiveValues.str(); fs << negativeValues.str(); } else { fs << negativeValues.str(); fs << positiveValues.str(); } fs << std::endl << "end" << std::endl; fs.close(); }
float getGain(int parent, int S, int attribute, int values[LENGTH][LENGTH], int nvalues, int nattributes, att * test) { attnode * curr; attnode * curr2; float entropy; float entropy2; float sum=0; int denominator; //=positiveValues(S, S, values, nvalues, nattributes, test)+negativeValues(S, S, values, nvalues, nattributes, test); int numerator; float gain; int positive; int negative; entropy=getEntropy(parent, S, values, nvalues, nattributes, test); //printf("ooh yas %lf\n", entropy); curr=test->head; while(curr!=NULL) { if(curr->equivalent==S) { break; } curr=curr->next; } if(S==0) { positive=positiveValues(S, S, values, nvalues, nattributes, test); negative=negativeValues(S, S, values, nvalues, nattributes, test); } else { updatePosNeg(parent, S, values, nvalues, nattributes, test); positive=curr->positivetracker; negative=curr->negativetracker; } denominator=positive+negative; curr=test->head; while(curr!=NULL) { if(curr->equivalent==attribute) { //printf("Found curr: %s %d %d %d\n", curr->attname, curr->x, curr->y, curr->equivalent); break; } curr=curr->next; } curr2=curr->next; while(curr2!=NULL) { if(curr2->y==curr->y) { //printf("Found curr2: %s %d %d %d\n", curr2->attname, curr2->x, curr2->y, curr2->equivalent); entropy2=getEntropy(S, curr2->equivalent, values, nvalues, nattributes, test); //printf("Entropy: %lf\n", entropy2); if (entropy2!=0) { if(S==0) { positive=positiveValues(S, curr2->equivalent, values, nvalues, nattributes, test); negative=negativeValues(S, curr2->equivalent, values, nvalues, nattributes, test); } else { updatePosNeg(S, curr2->equivalent, values, nvalues, nattributes, test); positive=curr2->positivetracker; negative=curr2->negativetracker; } numerator=positive+negative; //positiveValues(S, curr2->equivalent, values, nvalues, nattributes, test)+negativeValues(S, curr2->equivalent, values, nvalues, nattributes, test); //printf("Numerator: %d\n", numerator); //printf("Denominator: %d\n", denominator); sum+=(float)numerator/denominator*entropy2; printf("Gain+=%d/%d * %lf\n", numerator, denominator, entropy2); //printf("Sum ryt nao: %lf\n", sum); } } curr2=curr2->next; } gain=entropy-sum; printf("Gain: %lf\n", gain); return gain; }