double* discWeightedCondMI(uint k, uint noOfSamples, uint noOfFeatures, double **featureMatrix, double *classColumn, double *weightVector, double *outputFeatures, double *featureScores) { uint *intFeatures = (uint *) checkedCalloc(noOfSamples*noOfFeatures,sizeof(uint)); uint *intClass = (uint *) checkedCalloc(noOfSamples,sizeof(uint)); int *intOutputs = (int *) checkedCalloc(k,sizeof(int)); uint **intFeature2D = (uint**) checkedCalloc(noOfFeatures,sizeof(uint*)); int i; for (i = 0; i < noOfFeatures; i++) { intFeature2D[i] = intFeatures + i*noOfSamples; normaliseArray(featureMatrix[i],intFeature2D[i],noOfSamples); } normaliseArray(classColumn,intClass,noOfSamples); weightedCondMI(k,noOfSamples,noOfFeatures,intFeature2D,intClass,weightVector,intOutputs,featureScores); for (i = 0; i < k; i++) { outputFeatures[i] = intOutputs[i]; } FREE_FUNC(intFeatures); FREE_FUNC(intClass); FREE_FUNC(intOutputs); FREE_FUNC(intFeature2D); intFeatures = NULL; intClass = NULL; intOutputs = NULL; intFeature2D = NULL; return outputFeatures; }/*discWeightedCondMI(int,int,int,double[][],double[],double[],double[],double[])*/
ProbabilityState calculateProbability(double *dataVector, int vectorLength) { int *normalisedVector; int *stateCounts; double *stateProbs; int numStates; /*double fractionalState;*/ ProbabilityState state; int i; double length = vectorLength; normalisedVector = (int *) checkedCalloc(vectorLength,sizeof(int)); numStates = normaliseArray(dataVector,normalisedVector,vectorLength); stateCounts = (int *) checkedCalloc(numStates,sizeof(int)); stateProbs = (double *) checkedCalloc(numStates,sizeof(double)); /* optimised version, may have floating point problems fractionalState = 1.0 / vectorLength; for (i = 0; i < vectorLength; i++) { stateProbs[normalisedVector[i]] += fractionalState; } */ /* Optimised for number of FP operations now O(states) instead of O(vectorLength) */ for (i = 0; i < vectorLength; i++) { stateCounts[normalisedVector[i]] += 1; } for (i = 0; i < numStates; i++) { stateProbs[i] = stateCounts[i] / length; } FREE_FUNC(stateCounts); FREE_FUNC(normalisedVector); stateCounts = NULL; normalisedVector = NULL; state.probabilityVector = stateProbs; state.numStates = numStates; return state; }/*calculateProbability(double *,int)*/
WeightedProbState calculateWeightedProbability(double *dataVector, double *exampleWeightVector, int vectorLength) { int *normalisedVector; int *stateCounts; double *stateProbs; double *stateWeights; int numStates; WeightedProbState state; int i; double length = vectorLength; normalisedVector = (int *) checkedCalloc(vectorLength,sizeof(int)); numStates = normaliseArray(dataVector,normalisedVector,vectorLength); stateCounts = (int *) checkedCalloc(numStates,sizeof(int)); stateProbs = (double *) checkedCalloc(numStates,sizeof(double)); stateWeights = (double *) checkedCalloc(numStates,sizeof(double)); for (i = 0; i < vectorLength; i++) { stateCounts[normalisedVector[i]] += 1; stateWeights[normalisedVector[i]] += exampleWeightVector[i]; } for (i = 0; i < numStates; i++) { stateProbs[i] = stateCounts[i] / length; stateWeights[i] /= stateCounts[i]; } FREE_FUNC(stateCounts); FREE_FUNC(normalisedVector); stateCounts = NULL; normalisedVector = NULL; state.probabilityVector = stateProbs; state.stateWeightVector = stateWeights; state.numStates = numStates; return state; }/*calculateProbability(double *,int)*/
double calculateConditionalMutualInformation(double *dataVector, double *targetVector, double *conditionVector, int vectorLength) { double mutualInformation = 0.0; double firstCondition, secondCondition; double *mergedVector = (double *) checkedCalloc(vectorLength,sizeof(double)); mergeArrays(targetVector,conditionVector,mergedVector,vectorLength); /* I(X;Y|Z) = H(X|Z) - H(X|YZ) */ /* double calculateConditionalEntropy(double *dataVector, double *conditionVector, int vectorLength); */ firstCondition = calculateConditionalEntropy(dataVector,conditionVector,vectorLength); secondCondition = calculateConditionalEntropy(dataVector,mergedVector,vectorLength); mutualInformation = firstCondition - secondCondition; FREE_FUNC(mergedVector); mergedVector = NULL; return mutualInformation; }/*calculateConditionalMutualInformation(double *,double *,double *,int)*/
void addToLinkedList( struct LinkedList* list, void* data) { struct LinkedListNode* node; assert(list != NULL); node = checkedCalloc(1, sizeof(struct LinkedListNode)); node->data = data; if (list->tail == NULL) { list->head = node; list->tail = node; } else { list->tail->next = node; node->prev = list->tail; list->tail = node; } ++(list->size); }
void ICAP(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, int *outputFeatures,int *noOfOutput) { /*holds the class MI values*/ double *classMI = (double *)checkedCalloc(noOfFeatures,sizeof(double)); char *selectedFeatures = (char *)checkedCalloc(noOfFeatures,sizeof(char)); /*holds the intra feature MI values*/ int sizeOfMatrix = k*noOfFeatures; double *featureMIMatrix = (double *)checkedCalloc(sizeOfMatrix,sizeof(double)); double *featureCMIMatrix = (double *)checkedCalloc(sizeOfMatrix,sizeof(double)); /*Changed to ensure it always picks a feature*/ double maxMI = -1.0; int maxMICounter = -1; double score, currentScore, totalFeatureInteraction, interactionInfo; int currentHighestFeature, arrayPosition; int i, j, m; /*holds the first element of each sample*/ double **feature2D = (double **) checkedCalloc(noOfFeatures,sizeof(double *)); firstElementOfEachSample(feature2D,featureMatrix,noOfFeatures,noOfSamples); for (i = 0; i < sizeOfMatrix; i++) { featureMIMatrix[i] = -1; featureCMIMatrix[i] = -1; }/*for featureMIMatrix and featureCMIMatrix - blank to -1*/ /*SETUP COMPLETE*/ /*Algorithm starts here*/ for (i = 0; i < noOfFeatures;i++) { classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ selectedFeatures[maxMICounter] = 1; outputFeatures[0] = maxMICounter; (*noOfOutput)++; /************* ** Now we have populated the classMI array, and selected the highest ** MI feature as the first output feature *************/ for (i = 1; i < k; i++) { /********************************************************************** ** to ensure it selects some features **if this is zero then it will not pick features where the redundancy is greater than the **relevance **********************************************************************/ score = -DBL_MAX; currentHighestFeature = 0; currentScore = 0.0; for (j = 0; j < noOfFeatures; j++) { /*if we haven't selected j*/ if (!selectedFeatures[j]) { currentScore = classMI[j]; totalFeatureInteraction = 0.0; for (m = 0; m < i; m++) { arrayPosition = m*noOfFeatures + j; if (featureMIMatrix[arrayPosition] == -1) { /*work out interaction*/ /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ featureMIMatrix[arrayPosition] = calculateMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], noOfSamples); /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double* conditionVector, int vectorLength);*/ featureCMIMatrix[arrayPosition] = calculateConditionalMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], classColumn, noOfSamples); }/*if not already known*/ interactionInfo = featureCMIMatrix[arrayPosition] - featureMIMatrix[arrayPosition]; if (interactionInfo < 0) { totalFeatureInteraction += interactionInfo; } }/*for the number of already selected features*/ currentScore += totalFeatureInteraction; if (currentScore > score) { score = currentScore; currentHighestFeature = j; } }/*if j is unselected*/ }/*for number of features*/ selectedFeatures[currentHighestFeature] = 1; outputFeatures[i] = currentHighestFeature; (*noOfOutput)++; }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(feature2D); FREE_FUNC(featureMIMatrix); FREE_FUNC(featureCMIMatrix); FREE_FUNC(selectedFeatures); classMI = NULL; feature2D = NULL; featureMIMatrix = NULL; featureCMIMatrix = NULL; selectedFeatures = NULL; }/*ICAP(int,int,int,double[][],double[],double[])*/
double* BetaGamma(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures, double betaParam, double gammaParam) { double **feature2D = (double **) checkedCalloc(noOfFeatures,sizeof(double *)); /*holds the class MI values*/ double *classMI = (double *)checkedCalloc(noOfFeatures,sizeof(double)); char *selectedFeatures = (char *)checkedCalloc(noOfFeatures,sizeof(char)); /*holds the intra feature MI values*/ int sizeOfMatrix = k*noOfFeatures; double *featureMIMatrix = (double *)checkedCalloc(sizeOfMatrix,sizeof(double)); double maxMI = 0.0; int maxMICounter = -1; double score, currentScore, totalFeatureMI; int currentHighestFeature, arrayPosition; int i,j,m; /*********************************************************** ** because the array is passed as ** s a m p l e s ** f ** e ** a ** t ** u ** r ** e ** s ** ** this pulls out a pointer to the first sample of ** each feature and stores it as a multidimensional array ** so it can be indexed nicely ***********************************************************/ for(j = 0; j < noOfFeatures; j++) { feature2D[j] = featureMatrix + (int)j*noOfSamples; } for (i = 0; i < sizeOfMatrix; i++) { featureMIMatrix[i] = -1; }/*for featureMIMatrix - blank to -1*/ /*********************************************************** ** SETUP COMPLETE ** Algorithm starts here ***********************************************************/ for (i = 0; i < noOfFeatures; i++) { classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ selectedFeatures[maxMICounter] = 1; outputFeatures[0] = maxMICounter; /************* ** Now we have populated the classMI array, and selected the highest ** MI feature as the first output feature ** Now we move into the JMI algorithm *************/ for (i = 1; i < k; i++) { /************************************************************ ** to ensure it selects some features ** if this is zero then it will not pick features where the ** redundancy is greater than the relevance ************************************************************/ score = -DBL_MAX; currentHighestFeature = 0; currentScore = 0.0; totalFeatureMI = 0.0; for (j = 0; j < noOfFeatures; j++) { /*if we haven't selected j*/ if (!selectedFeatures[j]) { currentScore = classMI[j]; totalFeatureMI = 0.0; for (m = 0; m < i; m++) { arrayPosition = m*noOfFeatures + j; if (featureMIMatrix[arrayPosition] == -1) { /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ featureMIMatrix[arrayPosition] = betaParam*calculateMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], noOfSamples); /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double* conditionVector, int vectorLength);*/ featureMIMatrix[arrayPosition] -= gammaParam*calculateConditionalMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], classColumn, noOfSamples); }/*if not already known*/ totalFeatureMI += featureMIMatrix[arrayPosition]; }/*for the number of already selected features*/ currentScore -= (totalFeatureMI); if (currentScore > score) { score = currentScore; currentHighestFeature = j; } }/*if j is unselected*/ }/*for number of features*/ selectedFeatures[currentHighestFeature] = 1; outputFeatures[i] = currentHighestFeature; }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(feature2D); FREE_FUNC(featureMIMatrix); FREE_FUNC(selectedFeatures); classMI = NULL; feature2D = NULL; featureMIMatrix = NULL; selectedFeatures = NULL; return outputFeatures; }/*BetaGamma(int,int,int,double[][],double[],double[],double,double)*/
void JMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, int *outputFeatures,int *noOfOutput) { /*holds the class MI values*/ double *classMI = (double *)checkedCalloc(noOfFeatures,sizeof(double)); char *selectedFeatures = (char *)checkedCalloc(noOfFeatures,sizeof(char)); /*holds the intra feature MI values*/ int sizeOfMatrix = k*noOfFeatures; double *featureMIMatrix = (double *)checkedCalloc(sizeOfMatrix,sizeof(double)); /*Changed to ensure it always picks a feature*/ double maxMI = -1.0; int maxMICounter = -1; double score, currentScore, totalFeatureMI; int currentHighestFeature; double *mergedVector = (double *) checkedCalloc(noOfSamples,sizeof(double)); int arrayPosition; double mi, tripEntropy; int i,j,x; /*holds the first element of each sample*/ double **feature2D = (double **) checkedCalloc(noOfFeatures,sizeof(double *)); firstElementOfEachSample(feature2D,featureMatrix,noOfFeatures,noOfSamples); for (i = 0; i < sizeOfMatrix;i++) { featureMIMatrix[i] = -1; }/*for featureMIMatrix - blank to -1*/ for (i = 0; i < noOfFeatures;i++) { /*calculate mutual info **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength); */ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ selectedFeatures[maxMICounter] = 1; outputFeatures[0] = maxMICounter; (*noOfOutput)++; /***************************************************************************** ** We have populated the classMI array, and selected the highest ** MI feature as the first output feature ** Now we move into the JMI algorithm *****************************************************************************/ for (i = 1; i < k; i++) { score = 0.0; currentHighestFeature = 0; currentScore = 0.0; totalFeatureMI = 0.0; for (j = 0; j < noOfFeatures; j++) { /*if we haven't selected j*/ if (selectedFeatures[j] == 0) { currentScore = 0.0; totalFeatureMI = 0.0; for (x = 0; x < i; x++) { arrayPosition = x*noOfFeatures + j; if (featureMIMatrix[arrayPosition] == -1) { mergeArrays(feature2D[(int) outputFeatures[x]], feature2D[j],mergedVector,noOfSamples); /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ mi = calculateMutualInformation(mergedVector, classColumn, noOfSamples); featureMIMatrix[arrayPosition] = mi; }/*if not already known*/ currentScore += featureMIMatrix[arrayPosition]; }/*for the number of already selected features*/ if (currentScore > score) { score = currentScore; currentHighestFeature = j; } }/*if j is unselected*/ }/*for number of features*/ selectedFeatures[currentHighestFeature] = 1; outputFeatures[i] = currentHighestFeature; (*noOfOutput)++; }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(feature2D); FREE_FUNC(featureMIMatrix); FREE_FUNC(mergedVector); FREE_FUNC(selectedFeatures); classMI = NULL; feature2D = NULL; featureMIMatrix = NULL; mergedVector = NULL; selectedFeatures = NULL; //outputFeatures = realloc(outputFeatures, sizeof(double)*(noOfFeatures-*noOfOutput)); }/*JMI(int,int,int,double[][],double[],double[])*/
int* weightedCondMI(uint k, uint noOfSamples, uint noOfFeatures, uint **featureMatrix, uint *classColumn, double *weightVector, int *outputFeatures, double *featureScores) { char *selectedFeatures = (char *) checkedCalloc(noOfFeatures,sizeof(char)); /*holds the class MI values*/ double *classMI = (double *) checkedCalloc(noOfFeatures,sizeof(double)); /*Changed to ensure it always picks a feature*/ double maxMI = -1.0; int maxMICounter = -1; double score, currentScore; int currentHighestFeature; uint *conditionVector = (uint *) checkedCalloc(noOfSamples,sizeof(uint)); int i,j; for (j = 0; j < k; j++) { outputFeatures[j] = -1; } for (i = 0; i < noOfFeatures; i++) { /*calculate mutual info **double calcWeightedMutualInformation(uint *firstVector, uint *secondVector, uint *weightVector, int vectorLength); */ classMI[i] = calcWeightedMutualInformation(featureMatrix[i],classColumn,weightVector,noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ selectedFeatures[maxMICounter] = 1; outputFeatures[0] = maxMICounter; featureScores[0] = maxMI; memcpy(conditionVector,featureMatrix[maxMICounter],sizeof(int)*noOfSamples); /***************************************************************************** ** We have populated the classMI array, and selected the highest ** MI feature as the first output feature ** Now we move into the CondMI algorithm *****************************************************************************/ for (i = 1; i < k; i++) { score = 0.0; currentHighestFeature = -1; currentScore = 0.0; for (j = 0; j < noOfFeatures; j++) { /*if we haven't selected j*/ if (selectedFeatures[j] == 0) { currentScore = 0.0; /*double calcWeightedConditionalMutualInformation(uint *firstVector, uint *targetVector, uint *conditionVector, double *weightVector, int vectorLength);*/ currentScore = calcWeightedConditionalMutualInformation(featureMatrix[j],classColumn,conditionVector,weightVector,noOfSamples); if (currentScore > score) { score = currentScore; currentHighestFeature = j; } }/*if j is unselected*/ }/*for number of features*/ outputFeatures[i] = currentHighestFeature; featureScores[i] = score; if (currentHighestFeature != -1) { selectedFeatures[currentHighestFeature] = 1; mergeArrays(featureMatrix[currentHighestFeature],conditionVector,conditionVector,noOfSamples); } }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(conditionVector); FREE_FUNC(selectedFeatures); classMI = NULL; conditionVector = NULL; selectedFeatures = NULL; return outputFeatures; }/*weightedCondMI(uint,uint,uint,uint[][],uint[],double[],int[],double[])*/
uint* weightedCMIM(uint k, uint noOfSamples, uint noOfFeatures, uint **featureMatrix, uint *classColumn, double *weightVector, uint *outputFeatures, double *featureScores) { /*holds the class MI values **the class MI doubles as the partial score from the CMIM paper */ double *classMI = (double *) checkedCalloc(noOfFeatures,sizeof(double)); /*in the CMIM paper, m = lastUsedFeature*/ uint *lastUsedFeature = (uint *) checkedCalloc(noOfFeatures,sizeof(uint)); double score, conditionalInfo; int currentFeature; double maxMI = 0.0; int maxMICounter = -1; int j, i; for (i = 0; i < noOfFeatures; i++) { classMI[i] = calcWeightedMutualInformation(featureMatrix[i],classColumn,weightVector,noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ outputFeatures[0] = maxMICounter; featureScores[0] = maxMI; /***************************************************************************** ** We have populated the classMI array, and selected the highest ** MI feature as the first output feature ** Now we move into the CMIM algorithm *****************************************************************************/ for (i = 1; i < k; i++) { score = 0.0; for (j = 0; j < noOfFeatures; j++) { while ((classMI[j] > score) && (lastUsedFeature[j] < i)) { /*double calcWeightedConditionalMutualInformation(uint *firstVector, uint *targetVector, uint *conditionVector, double *weightVector, int vectorLength);*/ currentFeature = outputFeatures[lastUsedFeature[j]]; conditionalInfo = calcWeightedConditionalMutualInformation(featureMatrix[j], classColumn, featureMatrix[currentFeature], weightVector, noOfSamples); if (classMI[j] > conditionalInfo) { classMI[j] = conditionalInfo; }/*reset classMI*/ /*moved due to C indexing from 0 rather than 1*/ lastUsedFeature[j] += 1; }/*while partial score greater than score & not reached last feature*/ if (classMI[j] > score) { score = classMI[j]; featureScores[i] = score; outputFeatures[i] = j; }/*if partial score still greater than score*/ }/*for number of features*/ }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(lastUsedFeature); classMI = NULL; lastUsedFeature = NULL; return outputFeatures; }/*weightedCMIM(uint,uint,uint,uint[][],uint[],double[],uint[],double[])*/
void mRMR_D(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, int *outputFeatures,int *noOfOutput) { /*holds the class MI values*/ double *classMI = (double *)checkedCalloc(noOfFeatures,sizeof(double)); int *selectedFeatures = (int *)checkedCalloc(noOfFeatures,sizeof(int)); /*holds the intra feature MI values*/ int sizeOfMatrix = k*noOfFeatures; double *featureMIMatrix = (double *)checkedCalloc(sizeOfMatrix,sizeof(double)); /*Changed to ensure it always picks a feature*/ double maxMI = -1.0; int maxMICounter = -1; /*init variables*/ double score, currentScore, totalFeatureMI; int currentHighestFeature; int arrayPosition, i, j, x; /*holds the first element of each sample*/ double **feature2D = (double **) checkedCalloc(noOfFeatures,sizeof(double *)); firstElementOfEachSample(feature2D,featureMatrix,noOfFeatures,noOfSamples); for (i = 0; i < sizeOfMatrix;i++) { featureMIMatrix[i] = -1; }/*for featureMIMatrix - blank to -1*/ for (i = 0; i < noOfFeatures;i++) { classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); if (classMI[i] > maxMI) { maxMI = classMI[i]; maxMICounter = i; }/*if bigger than current maximum*/ }/*for noOfFeatures - filling classMI*/ selectedFeatures[maxMICounter] = 1; outputFeatures[0] = maxMICounter; (*noOfOutput)++; /************* ** Now we have populated the classMI array, and selected the highest ** MI feature as the first output feature ** Now we move into the mRMR-D algorithm *************/ for (i = 1; i < k; i++) { /**************************************************** ** to ensure it selects some features **if this is zero then it will not pick features where the redundancy is greater than the **relevance ****************************************************/ score = -DBL_MAX; currentHighestFeature = 0; currentScore = 0.0; totalFeatureMI = 0.0; for (j = 0; j < noOfFeatures; j++) { /*if we haven't selected j*/ if (selectedFeatures[j] == 0) { currentScore = classMI[j]; totalFeatureMI = 0.0; for (x = 0; x < i; x++) { arrayPosition = x*noOfFeatures + j; if (featureMIMatrix[arrayPosition] == -1) { /*work out intra MI*/ /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ featureMIMatrix[arrayPosition] = calculateMutualInformation(feature2D[(int) outputFeatures[x]], feature2D[j], noOfSamples); } totalFeatureMI += featureMIMatrix[arrayPosition]; }/*for the number of already selected features*/ currentScore -= (totalFeatureMI/i); if (currentScore > score) { score = currentScore; currentHighestFeature = j; } }/*if j is unselected*/ }/*for number of features*/ selectedFeatures[currentHighestFeature] = 1; outputFeatures[i] = currentHighestFeature; (*noOfOutput)++; }/*for the number of features to select*/ FREE_FUNC(classMI); FREE_FUNC(feature2D); FREE_FUNC(featureMIMatrix); FREE_FUNC(selectedFeatures); classMI = NULL; feature2D = NULL; featureMIMatrix = NULL; selectedFeatures = NULL; //outputFeatures = realloc(outputFeatures, sizeof(double)*(noOfFeatures-*noOfOutput)); }/*mRMR(int,int,int,double[][],double[],double[])*/
static const struct ProxySettings* processArgs( int argc, char** argv) { int retVal; bool foundLocalAddress = false; bool foundRemoteAddress = false; struct ProxySettings* proxySettings = checkedCalloc(1, sizeof(struct ProxySettings)); proxySettings->bufferSize = DEFAULT_BUFFER_SIZE; proxySettings->noDelay = DEFAULT_NO_DELAY_SETTING; proxySettings->numIOThreads = DEFAULT_NUM_IO_THREADS; initializeLinkedList(&(proxySettings->serverAddrInfoList)); do { retVal = getopt(argc, argv, "b:l:nr:t:"); switch (retVal) { case 'b': proxySettings->bufferSize = parseBufferSize(optarg); break; case 'l': addToLinkedList(&(proxySettings->serverAddrInfoList), parseAddrPort(optarg)); foundLocalAddress = true; break; case 'n': proxySettings->noDelay = true; break; case 'r': if (foundRemoteAddress) { printUsageAndExit(); } proxySettings->remoteAddrInfo = parseRemoteAddrPort( optarg, &(proxySettings->remoteAddrPortStrings)); foundRemoteAddress = true; break; case 't': proxySettings->numIOThreads = parseNumIOThreads(optarg); break; case '?': printUsageAndExit(); break; } } while (retVal != -1); if ((!foundLocalAddress) || (!foundRemoteAddress)) { printUsageAndExit(); } return proxySettings; }
JointProbabilityState calculateJointProbability(double *firstVector, double *secondVector, int vectorLength) { int *firstNormalisedVector; int *secondNormalisedVector; int *firstStateCounts; int *secondStateCounts; int *jointStateCounts; double *firstStateProbs; double *secondStateProbs; double *jointStateProbs; int firstNumStates; int secondNumStates; int jointNumStates; int i; double length = vectorLength; JointProbabilityState state; firstNormalisedVector = (int *) checkedCalloc(vectorLength,sizeof(int)); secondNormalisedVector = (int *) checkedCalloc(vectorLength,sizeof(int)); firstNumStates = normaliseArray(firstVector,firstNormalisedVector,vectorLength); secondNumStates = normaliseArray(secondVector,secondNormalisedVector,vectorLength); jointNumStates = firstNumStates * secondNumStates; firstStateCounts = (int *) checkedCalloc(firstNumStates,sizeof(int)); secondStateCounts = (int *) checkedCalloc(secondNumStates,sizeof(int)); jointStateCounts = (int *) checkedCalloc(jointNumStates,sizeof(int)); firstStateProbs = (double *) checkedCalloc(firstNumStates,sizeof(double)); secondStateProbs = (double *) checkedCalloc(secondNumStates,sizeof(double)); jointStateProbs = (double *) checkedCalloc(jointNumStates,sizeof(double)); /* optimised version, less numerically stable double fractionalState = 1.0 / vectorLength; for (i = 0; i < vectorLength; i++) { firstStateProbs[firstNormalisedVector[i]] += fractionalState; secondStateProbs[secondNormalisedVector[i]] += fractionalState; jointStateProbs[secondNormalisedVector[i] * firstNumStates + firstNormalisedVector[i]] += fractionalState; } */ /* Optimised for number of FP operations now O(states) instead of O(vectorLength) */ for (i = 0; i < vectorLength; i++) { firstStateCounts[firstNormalisedVector[i]] += 1; secondStateCounts[secondNormalisedVector[i]] += 1; jointStateCounts[secondNormalisedVector[i] * firstNumStates + firstNormalisedVector[i]] += 1; } for (i = 0; i < firstNumStates; i++) { firstStateProbs[i] = firstStateCounts[i] / length; } for (i = 0; i < secondNumStates; i++) { secondStateProbs[i] = secondStateCounts[i] / length; } for (i = 0; i < jointNumStates; i++) { jointStateProbs[i] = jointStateCounts[i] / length; } FREE_FUNC(firstNormalisedVector); FREE_FUNC(secondNormalisedVector); FREE_FUNC(firstStateCounts); FREE_FUNC(secondStateCounts); FREE_FUNC(jointStateCounts); firstNormalisedVector = NULL; secondNormalisedVector = NULL; firstStateCounts = NULL; secondStateCounts = NULL; jointStateCounts = NULL; /* **typedef struct **{ ** double *jointProbabilityVector; ** int numJointStates; ** double *firstProbabilityVector; ** int numFirstStates; ** double *secondProbabilityVector; ** int numSecondStates; **} JointProbabilityState; */ state.jointProbabilityVector = jointStateProbs; state.numJointStates = jointNumStates; state.firstProbabilityVector = firstStateProbs; state.numFirstStates = firstNumStates; state.secondProbabilityVector = secondStateProbs; state.numSecondStates = secondNumStates; return state; }/*calculateJointProbability(double *,double *, int)*/
WeightedJointProbState calculateWeightedJointProbability(double *firstVector, double *secondVector, double *weightVector, int vectorLength) { int *firstNormalisedVector; int *secondNormalisedVector; int *firstStateCounts; int *secondStateCounts; int *jointStateCounts; double *firstStateProbs; double *secondStateProbs; double *jointStateProbs; double *firstWeightVec; double *secondWeightVec; double *jointWeightVec; int firstNumStates; int secondNumStates; int jointNumStates; int i; double length = vectorLength; WeightedJointProbState state; firstNormalisedVector = (int *) checkedCalloc(vectorLength,sizeof(int)); secondNormalisedVector = (int *) checkedCalloc(vectorLength,sizeof(int)); firstNumStates = normaliseArray(firstVector,firstNormalisedVector,vectorLength); secondNumStates = normaliseArray(secondVector,secondNormalisedVector,vectorLength); jointNumStates = firstNumStates * secondNumStates; firstStateCounts = (int *) checkedCalloc(firstNumStates,sizeof(int)); secondStateCounts = (int *) checkedCalloc(secondNumStates,sizeof(int)); jointStateCounts = (int *) checkedCalloc(jointNumStates,sizeof(int)); firstStateProbs = (double *) checkedCalloc(firstNumStates,sizeof(double)); secondStateProbs = (double *) checkedCalloc(secondNumStates,sizeof(double)); jointStateProbs = (double *) checkedCalloc(jointNumStates,sizeof(double)); firstWeightVec = (double *) checkedCalloc(firstNumStates,sizeof(double)); secondWeightVec = (double *) checkedCalloc(secondNumStates,sizeof(double)); jointWeightVec = (double *) checkedCalloc(jointNumStates,sizeof(double)); for (i = 0; i < vectorLength; i++) { firstStateCounts[firstNormalisedVector[i]] += 1; secondStateCounts[secondNormalisedVector[i]] += 1; jointStateCounts[secondNormalisedVector[i] * firstNumStates + firstNormalisedVector[i]] += 1; firstWeightVec[firstNormalisedVector[i]] += weightVector[i]; secondWeightVec[secondNormalisedVector[i]] += weightVector[i]; jointWeightVec[secondNormalisedVector[i] * firstNumStates + firstNormalisedVector[i]] += weightVector[i]; } for (i = 0; i < firstNumStates; i++) { if (firstStateCounts[i]) { firstStateProbs[i] = firstStateCounts[i] / length; firstWeightVec[i] /= firstStateCounts[i]; } } for (i = 0; i < secondNumStates; i++) { if (secondStateCounts[i]) { secondStateProbs[i] = secondStateCounts[i] / length; secondWeightVec[i] /= secondStateCounts[i]; } } for (i = 0; i < jointNumStates; i++) { if (jointStateCounts[i]) { jointStateProbs[i] = jointStateCounts[i] / length; jointWeightVec[i] /= jointStateCounts[i]; } } FREE_FUNC(firstNormalisedVector); FREE_FUNC(secondNormalisedVector); FREE_FUNC(firstStateCounts); FREE_FUNC(secondStateCounts); FREE_FUNC(jointStateCounts); firstNormalisedVector = NULL; secondNormalisedVector = NULL; firstStateCounts = NULL; secondStateCounts = NULL; jointStateCounts = NULL; /* **typedef struct **{ ** double *jointProbabilityVector; ** double *jointWeightVector; ** int numJointStates; ** double *firstProbabilityVector; ** double *firstWeightVector; ** int numFirstStates; ** double *secondProbabilityVector; ** double *secondWeightVector; ** int numSecondStates; **} WeightedJointProbState; */ state.jointProbabilityVector = jointStateProbs; state.jointWeightVector = jointWeightVec; state.numJointStates = jointNumStates; state.firstProbabilityVector = firstStateProbs; state.firstWeightVector = firstWeightVec; state.numFirstStates = firstNumStates; state.secondProbabilityVector = secondStateProbs; state.secondWeightVector = secondWeightVec; state.numSecondStates = secondNumStates; return state; }/*calculateJointProbability(double *,double *, int)*/
double calcCondRenyiEnt(double alpha, double *dataVector, double *conditionVector, int uniqueInCondVector, int vectorLength) { /*uniqueInCondVector = is the number of unique values in the cond vector.*/ /*condEntropy = sum p(y) * sum p(x|y)^alpha(*/ /* ** first generate the seperate variables */ double *seperateVectors = (double *) checkedCalloc(uniqueInCondVector*vectorLength,sizeof(double)); int *seperateVectorCount = (int *) checkedCalloc(uniqueInCondVector,sizeof(int)); double seperateVectorProb = 0.0; int i,j; double entropy = 0.0; double tempValue = 0.0; int currentValue; double tempEntropy; ProbabilityState state; double **seperateVectors2D = (double **) checkedCalloc(uniqueInCondVector,sizeof(double*)); for(j=0; j < uniqueInCondVector; j++) seperateVectors2D[j] = seperateVectors + (int)j*vectorLength; for (i = 0; i < vectorLength; i++) { currentValue = (int) (conditionVector[i] - 1.0); /*printf("CurrentValue = %d\n",currentValue);*/ seperateVectors2D[currentValue][seperateVectorCount[currentValue]] = dataVector[i]; seperateVectorCount[currentValue]++; } for (j = 0; j < uniqueInCondVector; j++) { tempEntropy = 0.0; seperateVectorProb = ((double)seperateVectorCount[j]) / vectorLength; state = calculateProbability(seperateVectors2D[j],seperateVectorCount[j]); /*H_\alpha(X) = 1/(1-alpha) * log(2)(sum p(x)^alpha)*/ for (i = 0; i < state.numStates; i++) { tempValue = state.probabilityVector[i]; if (tempValue > 0) { tempEntropy += pow(tempValue,alpha); /*printf("Entropy = %f, i = %d\n", entropy,i);*/ } } /*printf("Entropy = %f\n", entropy);*/ tempEntropy = log(tempEntropy); tempEntropy /= log(2.0); tempEntropy /= (1.0-alpha); entropy += tempEntropy; FREE_FUNC(state.probabilityVector); } FREE_FUNC(seperateVectors2D); seperateVectors2D = NULL; FREE_FUNC(seperateVectors); FREE_FUNC(seperateVectorCount); seperateVectors = NULL; seperateVectorCount = NULL; return entropy; }/*calcCondRenyiEnt(double *,double *,int)*/