Beispiel #1
0
double calculateConditionalMutualInformation(double *dataVector, double *targetVector, double *conditionVector, int vectorLength)
{
    double mutualInformation = 0.0;
    double firstCondition, secondCondition;
    double *mergedVector = (double *) CALLOC_FUNC(vectorLength,sizeof(double));

    mergeArrays(targetVector,conditionVector,mergedVector,vectorLength);

    /* I(X;Y|Z) = H(X|Z) - H(X|YZ) */
    /* double calculateConditionalEntropy(double *dataVector, double *conditionVector, int vectorLength); */
    firstCondition = calculateConditionalEntropy(dataVector,conditionVector,vectorLength);
    secondCondition = calculateConditionalEntropy(dataVector,mergedVector,vectorLength);

    mutualInformation = firstCondition - secondCondition;

    FREE_FUNC(mergedVector);
    mergedVector = NULL;

    return mutualInformation;
}/*calculateConditionalMutualInformation(double *,double *,double *,int)*/
void mergeSort(int *arr, int n)
{
    if (n < 2)// base condition when array has less than 2 elements
        return;
    else
    {
        // we need to partition array into leftArray and rightArray
        int i;
        int mid = n/2;
        int *leftArray = malloc(sizeof(int)*mid);
        int *rightArray = malloc(sizeof(int)*(n-mid));

        // initializing elements
        for (i = 0; i < mid; ++i)
            leftArray[i] = arr[i];
        for (i = mid; i < n; ++i)
            rightArray[i-mid] = arr[i];

        mergeSort(leftArray, mid);
        mergeSort(rightArray, n-mid);
        mergeArrays(arr, leftArray, rightArray, mid, n-mid);
    }
}
Beispiel #3
0
void JMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, int *outputFeatures,int *noOfOutput)
{
  /*holds the class MI values*/
  double *classMI = (double *)checkedCalloc(noOfFeatures,sizeof(double));
  
  char *selectedFeatures = (char *)checkedCalloc(noOfFeatures,sizeof(char));
  
  /*holds the intra feature MI values*/
  int sizeOfMatrix = k*noOfFeatures;
  double *featureMIMatrix = (double *)checkedCalloc(sizeOfMatrix,sizeof(double));
  
  /*Changed to ensure it always picks a feature*/
  double maxMI = -1.0;
  int maxMICounter = -1;
  
  double score, currentScore, totalFeatureMI;
  int currentHighestFeature;
  
  double *mergedVector = (double *) checkedCalloc(noOfSamples,sizeof(double));
  
  int arrayPosition;
  double mi, tripEntropy;
  
  int i,j,x;
  
      /*holds the first element of each sample*/
  double **feature2D = (double **) checkedCalloc(noOfFeatures,sizeof(double *));
  firstElementOfEachSample(feature2D,featureMatrix,noOfFeatures,noOfSamples);
  
  for (i = 0; i < sizeOfMatrix;i++)
  {
    featureMIMatrix[i] = -1;
  }/*for featureMIMatrix - blank to -1*/


  for (i = 0; i < noOfFeatures;i++)
  {    
    /*calculate mutual info
    **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);
    */
    classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
    
    if (classMI[i] > maxMI)
    {
      maxMI = classMI[i];
      maxMICounter = i;
    }/*if bigger than current maximum*/
  }/*for noOfFeatures - filling classMI*/
  
  selectedFeatures[maxMICounter] = 1;
  outputFeatures[0] = maxMICounter;
  (*noOfOutput)++;
  
  /*****************************************************************************
  ** We have populated the classMI array, and selected the highest
  ** MI feature as the first output feature
  ** Now we move into the JMI algorithm
  *****************************************************************************/
  
  for (i = 1; i < k; i++)
  {
    score = 0.0;
    currentHighestFeature = 0;
    currentScore = 0.0;
    totalFeatureMI = 0.0;
    
    for (j = 0; j < noOfFeatures; j++)
    {
      /*if we haven't selected j*/
      if (selectedFeatures[j] == 0)
      {
        currentScore = 0.0;
        totalFeatureMI = 0.0;
        
        for (x = 0; x < i; x++)
        {
          arrayPosition = x*noOfFeatures + j;
          if (featureMIMatrix[arrayPosition] == -1)
          {
            mergeArrays(feature2D[(int) outputFeatures[x]], feature2D[j],mergedVector,noOfSamples);
            /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/
            mi = calculateMutualInformation(mergedVector, classColumn, noOfSamples);
            
            featureMIMatrix[arrayPosition] = mi;
          }/*if not already known*/
          currentScore += featureMIMatrix[arrayPosition];
        }/*for the number of already selected features*/
        
        if (currentScore > score)
		{
		  score = currentScore;
		  currentHighestFeature = j;
		}
	  }/*if j is unselected*/
    }/*for number of features*/
  
    selectedFeatures[currentHighestFeature] = 1;
    outputFeatures[i] = currentHighestFeature;
    (*noOfOutput)++;
  
  }/*for the number of features to select*/

  FREE_FUNC(classMI);
  FREE_FUNC(feature2D);
  FREE_FUNC(featureMIMatrix);
  FREE_FUNC(mergedVector);
  FREE_FUNC(selectedFeatures);
  
  classMI = NULL;
  feature2D = NULL;
  featureMIMatrix = NULL;
  mergedVector = NULL;
  selectedFeatures = NULL;
  
  //outputFeatures = realloc(outputFeatures, sizeof(double)*(noOfFeatures-*noOfOutput));
}/*JMI(int,int,int,double[][],double[],double[])*/
Beispiel #4
0
int* weightedCondMI(uint k, uint noOfSamples, uint noOfFeatures, uint **featureMatrix, uint *classColumn, double *weightVector, int *outputFeatures, double *featureScores) {
    char *selectedFeatures = (char *) checkedCalloc(noOfFeatures,sizeof(char));

    /*holds the class MI values*/
    double *classMI = (double *) checkedCalloc(noOfFeatures,sizeof(double));

    /*Changed to ensure it always picks a feature*/
    double maxMI = -1.0;
    int maxMICounter = -1;

    double score, currentScore;
    int currentHighestFeature;

    uint *conditionVector = (uint *) checkedCalloc(noOfSamples,sizeof(uint));

    int i,j;

    for (j = 0; j < k; j++) {
        outputFeatures[j] = -1;
    }

    for (i = 0; i < noOfFeatures; i++) {
        /*calculate mutual info
         **double calcWeightedMutualInformation(uint *firstVector, uint *secondVector, uint *weightVector, int vectorLength);
         */
        classMI[i] = calcWeightedMutualInformation(featureMatrix[i],classColumn,weightVector,noOfSamples);

        if (classMI[i] > maxMI) {
            maxMI = classMI[i];
            maxMICounter = i;
        }/*if bigger than current maximum*/
    }/*for noOfFeatures - filling classMI*/

    selectedFeatures[maxMICounter] = 1;
    outputFeatures[0] = maxMICounter;
    featureScores[0] = maxMI;

    memcpy(conditionVector,featureMatrix[maxMICounter],sizeof(int)*noOfSamples);

    /*****************************************************************************
     ** We have populated the classMI array, and selected the highest
     ** MI feature as the first output feature
     ** Now we move into the CondMI algorithm
     *****************************************************************************/

    for (i = 1; i < k; i++) {
        score = 0.0;
        currentHighestFeature = -1;
        currentScore = 0.0;

        for (j = 0; j < noOfFeatures; j++) {
            /*if we haven't selected j*/
            if (selectedFeatures[j] == 0) {
                currentScore = 0.0;

                /*double calcWeightedConditionalMutualInformation(uint *firstVector, uint *targetVector, uint *conditionVector, double *weightVector, int vectorLength);*/
                currentScore = calcWeightedConditionalMutualInformation(featureMatrix[j],classColumn,conditionVector,weightVector,noOfSamples);

                if (currentScore > score) {
                    score = currentScore;
                    currentHighestFeature = j;
                }
            }/*if j is unselected*/
        }/*for number of features*/

        outputFeatures[i] = currentHighestFeature;
        featureScores[i] = score;

        if (currentHighestFeature != -1) {
            selectedFeatures[currentHighestFeature] = 1;
            mergeArrays(featureMatrix[currentHighestFeature],conditionVector,conditionVector,noOfSamples);
        }

    }/*for the number of features to select*/

    FREE_FUNC(classMI);
    FREE_FUNC(conditionVector);
    FREE_FUNC(selectedFeatures);

    classMI = NULL;
    conditionVector = NULL;
    selectedFeatures = NULL;

    return outputFeatures;
}/*weightedCondMI(uint,uint,uint,uint[][],uint[],double[],int[],double[])*/
Beispiel #5
0
double* CondMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
{
    /*holds the class MI values*/
    double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));

    char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char));

    /*holds the intra feature MI values*/
    int sizeOfMatrix = k*noOfFeatures;
    double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));

    double maxMI = 0.0;
    int maxMICounter = -1;

    double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*));

    double score, currentScore, totalFeatureMI;
    int currentHighestFeature;

    double *conditionVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double));

    int arrayPosition;
    double mi, tripEntropy;

    int i,j,x;

    for(j = 0; j < noOfFeatures; j++)
    {
        feature2D[j] = featureMatrix + (int)j*noOfSamples;
    }

    for (i = 0; i < sizeOfMatrix; i++)
    {
        featureMIMatrix[i] = -1;
    }/*for featureMIMatrix - blank to -1*/

    for (i = 0; i < noOfFeatures; i++)
    {
        /*calculate mutual info
        **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);
        */
        classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);

        if (classMI[i] > maxMI)
        {
            maxMI = classMI[i];
            maxMICounter = i;
        }/*if bigger than current maximum*/
    }/*for noOfFeatures - filling classMI*/

    selectedFeatures[maxMICounter] = 1;
    outputFeatures[0] = maxMICounter;

    memcpy(conditionVector,feature2D[maxMICounter],sizeof(double)*noOfSamples);

    /*****************************************************************************
    ** We have populated the classMI array, and selected the highest
    ** MI feature as the first output feature
    ** Now we move into the CondMI algorithm
    *****************************************************************************/

    for (i = 1; i < k; i++)
    {
        score = 0.0;
        currentHighestFeature = -1;
        currentScore = 0.0;
        totalFeatureMI = 0.0;

        for (j = 0; j < noOfFeatures; j++)
        {
            /*if we haven't selected j*/
            if (selectedFeatures[j] == 0)
            {
                currentScore = 0.0;
                totalFeatureMI = 0.0;

                /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double *conditionVector, int vectorLength);*/
                currentScore = calculateConditionalMutualInformation(feature2D[j],classColumn,conditionVector,noOfSamples);

                if (currentScore > score)
                {
                    score = currentScore;
                    currentHighestFeature = j;
                }
            }/*if j is unselected*/
        }/*for number of features*/

        outputFeatures[i] = currentHighestFeature;

        if (currentHighestFeature != -1)
        {
            selectedFeatures[currentHighestFeature] = 1;
            mergeArrays(feature2D[currentHighestFeature],conditionVector,conditionVector,noOfSamples);
        }

    }/*for the number of features to select*/

    FREE_FUNC(classMI);
    FREE_FUNC(conditionVector);
    FREE_FUNC(feature2D);
    FREE_FUNC(featureMIMatrix);
    FREE_FUNC(selectedFeatures);

    classMI = NULL;
    conditionVector = NULL;
    feature2D = NULL;
    featureMIMatrix = NULL;
    selectedFeatures = NULL;

    return outputFeatures;
}/*CondMI(int,int,int,double[][],double[],double[])*/