コード例 #1
0
double calculateConditionalMutualInformation(double *dataVector, double *targetVector, double *conditionVector, int vectorLength)
{
    double mutualInformation = 0.0;
    double firstCondition, secondCondition;
    double *mergedVector = (double *) CALLOC_FUNC(vectorLength,sizeof(double));

    mergeArrays(targetVector,conditionVector,mergedVector,vectorLength);

    /* I(X;Y|Z) = H(X|Z) - H(X|YZ) */
    /* double calculateConditionalEntropy(double *dataVector, double *conditionVector, int vectorLength); */
    firstCondition = calculateConditionalEntropy(dataVector,conditionVector,vectorLength);
    secondCondition = calculateConditionalEntropy(dataVector,mergedVector,vectorLength);

    mutualInformation = firstCondition - secondCondition;

    FREE_FUNC(mergedVector);
    mergedVector = NULL;

    return mutualInformation;
}/*calculateConditionalMutualInformation(double *,double *,double *,int)*/
コード例 #2
0
ファイル: BetaGamma.c プロジェクト: ageek/PyFeast
double* BetaGamma(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures, double betaParam, double gammaParam)
{
    double **feature2D = (double **) CALLOC_FUNC(noOfFeatures,sizeof(double *));
    
    /*holds the class MI values*/
    double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
    char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char));
    
    /*holds the intra feature MI values*/
    int sizeOfMatrix = k*noOfFeatures;
    double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));
    
    double maxMI = 0.0;
    int maxMICounter = -1;
    
    double score, currentScore, totalFeatureMI;
    int currentHighestFeature, arrayPosition;
   
    int i,j,m;

    /***********************************************************
    ** because the array is passed as
    **  s a m p l e s
    ** f
    ** e
    ** a
    ** t
    ** u
    ** r
    ** e
    ** s
    ** 
    ** this pulls out a pointer to the first sample of
    ** each feature and stores it as a multidimensional array
    ** so it can be indexed nicely
    ***********************************************************/
    for(j = 0; j < noOfFeatures; j++)
    {
        feature2D[j] = featureMatrix + (int)j*noOfSamples;
    }

    for (i = 0; i < sizeOfMatrix; i++)
    {
        featureMIMatrix[i] = -1;
    }/*for featureMIMatrix - blank to -1*/
    
    /***********************************************************
    ** SETUP COMPLETE
    ** Algorithm starts here
    ***********************************************************/
    
    for (i = 0; i < noOfFeatures; i++)
    {
        classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
        
        if (classMI[i] > maxMI)
        {
            maxMI = classMI[i];
            maxMICounter = i;
        }/*if bigger than current maximum*/
    }/*for noOfFeatures - filling classMI*/
    
    selectedFeatures[maxMICounter] = 1;
    outputFeatures[0] = maxMICounter;
    
  /*************
   ** Now we have populated the classMI array, and selected the highest
   ** MI feature as the first output feature
   ** Now we move into the JMI algorithm
   *************/
    
    for (i = 1; i < k; i++)
    {
        /************************************************************
        ** to ensure it selects some features
        ** if this is zero then it will not pick features where the 
        ** redundancy is greater than the relevance
        ************************************************************/
        score = -HUGE_VAL;
        currentHighestFeature = 0;
        currentScore = 0.0;
        totalFeatureMI = 0.0;
        
        for (j = 0; j < noOfFeatures; j++)
        {
            /*if we haven't selected j*/
            if (!selectedFeatures[j])
            {
                currentScore = classMI[j];
                totalFeatureMI = 0.0;
                
                for (m = 0; m < i; m++)
                {
                    arrayPosition = m*noOfFeatures + j;
                    if (featureMIMatrix[arrayPosition] == -1)
                    {
                        /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/
                        featureMIMatrix[arrayPosition] = betaParam*calculateMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], noOfSamples);
                        
                        /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double* conditionVector, int vectorLength);*/
                        featureMIMatrix[arrayPosition] -= gammaParam*calculateConditionalMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], classColumn, noOfSamples);
                    }/*if not already known*/
                    
                    totalFeatureMI += featureMIMatrix[arrayPosition];
                }/*for the number of already selected features*/
                
                currentScore -= (totalFeatureMI);

                if (currentScore > score)
                {
                    score = currentScore;
                    currentHighestFeature = j;
                }
            }/*if j is unselected*/
        }/*for number of features*/
        
        selectedFeatures[currentHighestFeature] = 1;
        outputFeatures[i] = currentHighestFeature;
        
    }/*for the number of features to select*/
    
    for (i = 0; i < k; i++)
    {
        outputFeatures[i] += 1; /*C++ indexes from 0 not 1*/
    }/*for number of selected features*/
    
    return outputFeatures;
}/*BetaGamma(int,int,int,double[][],double[],double[],double,double)*/
コード例 #3
0
ファイル: CMIM.c プロジェクト: ageek/PyFeast
double* CMIM(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
{
  /*holds the class MI values
  **the class MI doubles as the partial score from the CMIM paper
  */
  double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
  /*in the CMIM paper, m = lastUsedFeature*/
  int *lastUsedFeature = (int *)CALLOC_FUNC(noOfFeatures,sizeof(int));
  
  double score, conditionalInfo;
  int iMinus, currentFeature;
  
  double maxMI = 0.0;
  int maxMICounter = -1;
  
  int j,i;

  double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*));

  for(j = 0; j < noOfFeatures; j++)
  {
    feature2D[j] = featureMatrix + (int)j*noOfSamples;
  }
  
  for (i = 0; i < noOfFeatures;i++)
  {
    classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
    
    if (classMI[i] > maxMI)
    {
      maxMI = classMI[i];
      maxMICounter = i;
    }/*if bigger than current maximum*/
  }/*for noOfFeatures - filling classMI*/
  
  outputFeatures[0] = maxMICounter;
  
  /*****************************************************************************
  ** We have populated the classMI array, and selected the highest
  ** MI feature as the first output feature
  ** Now we move into the CMIM algorithm
  *****************************************************************************/
  
  for (i = 1; i < k; i++)
  {
    score = 0.0;
    iMinus = i-1;
    
    for (j = 0; j < noOfFeatures; j++)
    {
      while ((classMI[j] > score) && (lastUsedFeature[j] < i))
      {
        /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double *conditionVector, int vectorLength);*/
        currentFeature = (int) outputFeatures[lastUsedFeature[j]];
        conditionalInfo = calculateConditionalMutualInformation(feature2D[j],classColumn,feature2D[currentFeature],noOfSamples);
        if (classMI[j] > conditionalInfo)
        {
          classMI[j] = conditionalInfo;
        }/*reset classMI*/
        /*moved due to C indexing from 0 rather than 1*/
        lastUsedFeature[j] += 1;
      }/*while partial score greater than score & not reached last feature*/
      if (classMI[j] > score)
      {
        score = classMI[j];
        outputFeatures[i] = j;
      }/*if partial score still greater than score*/
	}/*for number of features*/
  }/*for the number of features to select*/
  
  
  for (i = 0; i < k; i++)
  {
    outputFeatures[i] += 1; /*C indexes from 0 not 1*/
  }/*for number of selected features*/
  
  FREE_FUNC(classMI);
  FREE_FUNC(lastUsedFeature);
  FREE_FUNC(feature2D);

  classMI = NULL;
  lastUsedFeature = NULL;
  feature2D = NULL;

  return outputFeatures;
}/*CMIM(int,int,int,double[][],double[],double[])*/
コード例 #4
0
ファイル: CondMI.c プロジェクト: ubaniak/PyFeast
double* CondMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
{
    /*holds the class MI values*/
    double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));

    char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char));

    /*holds the intra feature MI values*/
    int sizeOfMatrix = k*noOfFeatures;
    double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));

    double maxMI = 0.0;
    int maxMICounter = -1;

    double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*));

    double score, currentScore, totalFeatureMI;
    int currentHighestFeature;

    double *conditionVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double));

    int arrayPosition;
    double mi, tripEntropy;

    int i,j,x;

    for(j = 0; j < noOfFeatures; j++)
    {
        feature2D[j] = featureMatrix + (int)j*noOfSamples;
    }

    for (i = 0; i < sizeOfMatrix; i++)
    {
        featureMIMatrix[i] = -1;
    }/*for featureMIMatrix - blank to -1*/

    for (i = 0; i < noOfFeatures; i++)
    {
        /*calculate mutual info
        **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);
        */
        classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);

        if (classMI[i] > maxMI)
        {
            maxMI = classMI[i];
            maxMICounter = i;
        }/*if bigger than current maximum*/
    }/*for noOfFeatures - filling classMI*/

    selectedFeatures[maxMICounter] = 1;
    outputFeatures[0] = maxMICounter;

    memcpy(conditionVector,feature2D[maxMICounter],sizeof(double)*noOfSamples);

    /*****************************************************************************
    ** We have populated the classMI array, and selected the highest
    ** MI feature as the first output feature
    ** Now we move into the CondMI algorithm
    *****************************************************************************/

    for (i = 1; i < k; i++)
    {
        score = 0.0;
        currentHighestFeature = -1;
        currentScore = 0.0;
        totalFeatureMI = 0.0;

        for (j = 0; j < noOfFeatures; j++)
        {
            /*if we haven't selected j*/
            if (selectedFeatures[j] == 0)
            {
                currentScore = 0.0;
                totalFeatureMI = 0.0;

                /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double *conditionVector, int vectorLength);*/
                currentScore = calculateConditionalMutualInformation(feature2D[j],classColumn,conditionVector,noOfSamples);

                if (currentScore > score)
                {
                    score = currentScore;
                    currentHighestFeature = j;
                }
            }/*if j is unselected*/
        }/*for number of features*/

        outputFeatures[i] = currentHighestFeature;

        if (currentHighestFeature != -1)
        {
            selectedFeatures[currentHighestFeature] = 1;
            mergeArrays(feature2D[currentHighestFeature],conditionVector,conditionVector,noOfSamples);
        }

    }/*for the number of features to select*/

    FREE_FUNC(classMI);
    FREE_FUNC(conditionVector);
    FREE_FUNC(feature2D);
    FREE_FUNC(featureMIMatrix);
    FREE_FUNC(selectedFeatures);

    classMI = NULL;
    conditionVector = NULL;
    feature2D = NULL;
    featureMIMatrix = NULL;
    selectedFeatures = NULL;

    return outputFeatures;
}/*CondMI(int,int,int,double[][],double[],double[])*/