double calculateConditionalMutualInformation(double *dataVector, double *targetVector, double *conditionVector, int vectorLength) { double mutualInformation = 0.0; double firstCondition, secondCondition; double *mergedVector = (double *) CALLOC_FUNC(vectorLength,sizeof(double)); mergeArrays(targetVector,conditionVector,mergedVector,vectorLength); /* I(X;Y|Z) = H(X|Z) - H(X|YZ) */ /* double calculateConditionalEntropy(double *dataVector, double *conditionVector, int vectorLength); */ firstCondition = calculateConditionalEntropy(dataVector,conditionVector,vectorLength); secondCondition = calculateConditionalEntropy(dataVector,mergedVector,vectorLength); mutualInformation = firstCondition - secondCondition; FREE_FUNC(mergedVector); mergedVector = NULL; return mutualInformation; }/*calculateConditionalMutualInformation(double *,double *,double *,int)*/
void mergeSort(int *arr, int n) { if (n < 2)// base condition when array has less than 2 elements return; else { // we need to partition array into leftArray and rightArray int i; int mid = n/2; int *leftArray = malloc(sizeof(int)*mid); int *rightArray = malloc(sizeof(int)*(n-mid)); // initializing elements for (i = 0; i < mid; ++i) leftArray[i] = arr[i]; for (i = mid; i < n; ++i) rightArray[i-mid] = arr[i]; mergeSort(leftArray, mid); mergeSort(rightArray, n-mid); mergeArrays(arr, leftArray, rightArray, mid, n-mid); } }
void JMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, int *outputFeatures,int *noOfOutput) { /*holds the class MI values*/ double *classMI = (double *)checkedCalloc(noOfFeatures,sizeof(double)); char *selectedFeatures = (char *)checkedCalloc(noOfFeatures,sizeof(char)); /*holds the intra feature MI values*/ int sizeOfMatrix = k*noOfFeatures; double *featureMIMatrix = (double *)checkedCalloc(sizeOfMatrix,sizeof(double)); /*Changed to ensure it always picks a feature*/ double maxMI = -1.0; int maxMICounter = -1; double score, currentScore, totalFeatureMI; int currentHighestFeature; double *mergedVector = (double *) checkedCalloc(noOfSamples,sizeof(double)); int arrayPosition; double mi, tripEntropy; int i,j,x; /*holds the first element of each sample*/ double **feature2D = (double **) checkedCalloc(noOfFeatures,sizeof(double *)); firstElementOfEachSample(feature2D,featureMatrix,noOfFeatures,noOfSamples); for (i = 0; i < sizeOfMatrix;i++) { featureMIMatrix[i] = -1; }/*for featureMIMatrix - blank to -1*/ for (i = 0; i < noOfFeatures;i++) { /*calculate mutual info **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength); */ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ selectedFeatures[maxMICounter] = 1; outputFeatures[0] = maxMICounter; (*noOfOutput)++; /***************************************************************************** ** We have populated the classMI array, and selected the highest ** MI feature as the first output feature ** Now we move into the JMI algorithm *****************************************************************************/ for (i = 1; i < k; i++) { score = 0.0; currentHighestFeature = 0; currentScore = 0.0; totalFeatureMI = 0.0; for (j = 0; j < noOfFeatures; j++) { /*if we haven't selected j*/ if (selectedFeatures[j] == 0) { currentScore = 0.0; totalFeatureMI = 0.0; for (x = 0; x < i; x++) { arrayPosition = x*noOfFeatures + j; if (featureMIMatrix[arrayPosition] == -1) { mergeArrays(feature2D[(int) outputFeatures[x]], feature2D[j],mergedVector,noOfSamples); /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ mi = calculateMutualInformation(mergedVector, classColumn, noOfSamples); featureMIMatrix[arrayPosition] = mi; }/*if not already known*/ currentScore += featureMIMatrix[arrayPosition]; }/*for the number of already selected features*/ if (currentScore > score) { score = currentScore; currentHighestFeature = j; } }/*if j is unselected*/ }/*for number of features*/ selectedFeatures[currentHighestFeature] = 1; outputFeatures[i] = currentHighestFeature; (*noOfOutput)++; }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(feature2D); FREE_FUNC(featureMIMatrix); FREE_FUNC(mergedVector); FREE_FUNC(selectedFeatures); classMI = NULL; feature2D = NULL; featureMIMatrix = NULL; mergedVector = NULL; selectedFeatures = NULL; //outputFeatures = realloc(outputFeatures, sizeof(double)*(noOfFeatures-*noOfOutput)); }/*JMI(int,int,int,double[][],double[],double[])*/
int* weightedCondMI(uint k, uint noOfSamples, uint noOfFeatures, uint **featureMatrix, uint *classColumn, double *weightVector, int *outputFeatures, double *featureScores) { char *selectedFeatures = (char *) checkedCalloc(noOfFeatures,sizeof(char)); /*holds the class MI values*/ double *classMI = (double *) checkedCalloc(noOfFeatures,sizeof(double)); /*Changed to ensure it always picks a feature*/ double maxMI = -1.0; int maxMICounter = -1; double score, currentScore; int currentHighestFeature; uint *conditionVector = (uint *) checkedCalloc(noOfSamples,sizeof(uint)); int i,j; for (j = 0; j < k; j++) { outputFeatures[j] = -1; } for (i = 0; i < noOfFeatures; i++) { /*calculate mutual info **double calcWeightedMutualInformation(uint *firstVector, uint *secondVector, uint *weightVector, int vectorLength); */ classMI[i] = calcWeightedMutualInformation(featureMatrix[i],classColumn,weightVector,noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ selectedFeatures[maxMICounter] = 1; outputFeatures[0] = maxMICounter; featureScores[0] = maxMI; memcpy(conditionVector,featureMatrix[maxMICounter],sizeof(int)*noOfSamples); /***************************************************************************** ** We have populated the classMI array, and selected the highest ** MI feature as the first output feature ** Now we move into the CondMI algorithm *****************************************************************************/ for (i = 1; i < k; i++) { score = 0.0; currentHighestFeature = -1; currentScore = 0.0; for (j = 0; j < noOfFeatures; j++) { /*if we haven't selected j*/ if (selectedFeatures[j] == 0) { currentScore = 0.0; /*double calcWeightedConditionalMutualInformation(uint *firstVector, uint *targetVector, uint *conditionVector, double *weightVector, int vectorLength);*/ currentScore = calcWeightedConditionalMutualInformation(featureMatrix[j],classColumn,conditionVector,weightVector,noOfSamples); if (currentScore > score) { score = currentScore; currentHighestFeature = j; } }/*if j is unselected*/ }/*for number of features*/ outputFeatures[i] = currentHighestFeature; featureScores[i] = score; if (currentHighestFeature != -1) { selectedFeatures[currentHighestFeature] = 1; mergeArrays(featureMatrix[currentHighestFeature],conditionVector,conditionVector,noOfSamples); } }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(conditionVector); FREE_FUNC(selectedFeatures); classMI = NULL; conditionVector = NULL; selectedFeatures = NULL; return outputFeatures; }/*weightedCondMI(uint,uint,uint,uint[][],uint[],double[],int[],double[])*/
double* CondMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures) { /*holds the class MI values*/ double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double)); char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char)); /*holds the intra feature MI values*/ int sizeOfMatrix = k*noOfFeatures; double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double)); double maxMI = 0.0; int maxMICounter = -1; double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*)); double score, currentScore, totalFeatureMI; int currentHighestFeature; double *conditionVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double)); int arrayPosition; double mi, tripEntropy; int i,j,x; for(j = 0; j < noOfFeatures; j++) { feature2D[j] = featureMatrix + (int)j*noOfSamples; } for (i = 0; i < sizeOfMatrix; i++) { featureMIMatrix[i] = -1; }/*for featureMIMatrix - blank to -1*/ for (i = 0; i < noOfFeatures; i++) { /*calculate mutual info **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength); */ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ selectedFeatures[maxMICounter] = 1; outputFeatures[0] = maxMICounter; memcpy(conditionVector,feature2D[maxMICounter],sizeof(double)*noOfSamples); /***************************************************************************** ** We have populated the classMI array, and selected the highest ** MI feature as the first output feature ** Now we move into the CondMI algorithm *****************************************************************************/ for (i = 1; i < k; i++) { score = 0.0; currentHighestFeature = -1; currentScore = 0.0; totalFeatureMI = 0.0; for (j = 0; j < noOfFeatures; j++) { /*if we haven't selected j*/ if (selectedFeatures[j] == 0) { currentScore = 0.0; totalFeatureMI = 0.0; /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double *conditionVector, int vectorLength);*/ currentScore = calculateConditionalMutualInformation(feature2D[j],classColumn,conditionVector,noOfSamples); if (currentScore > score) { score = currentScore; currentHighestFeature = j; } }/*if j is unselected*/ }/*for number of features*/ outputFeatures[i] = currentHighestFeature; if (currentHighestFeature != -1) { selectedFeatures[currentHighestFeature] = 1; mergeArrays(feature2D[currentHighestFeature],conditionVector,conditionVector,noOfSamples); } }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(conditionVector); FREE_FUNC(feature2D); FREE_FUNC(featureMIMatrix); FREE_FUNC(selectedFeatures); classMI = NULL; conditionVector = NULL; feature2D = NULL; featureMIMatrix = NULL; selectedFeatures = NULL; return outputFeatures; }/*CondMI(int,int,int,double[][],double[],double[])*/