Exemplo n.º 1
0
int* weightedCondMI(uint k, uint noOfSamples, uint noOfFeatures, uint **featureMatrix, uint *classColumn, double *weightVector, int *outputFeatures, double *featureScores) {
    char *selectedFeatures = (char *) checkedCalloc(noOfFeatures,sizeof(char));

    /*holds the class MI values*/
    double *classMI = (double *) checkedCalloc(noOfFeatures,sizeof(double));

    /*Changed to ensure it always picks a feature*/
    double maxMI = -1.0;
    int maxMICounter = -1;

    double score, currentScore;
    int currentHighestFeature;

    uint *conditionVector = (uint *) checkedCalloc(noOfSamples,sizeof(uint));

    int i,j;

    for (j = 0; j < k; j++) {
        outputFeatures[j] = -1;
    }

    for (i = 0; i < noOfFeatures; i++) {
        /*calculate mutual info
         **double calcWeightedMutualInformation(uint *firstVector, uint *secondVector, uint *weightVector, int vectorLength);
         */
        classMI[i] = calcWeightedMutualInformation(featureMatrix[i],classColumn,weightVector,noOfSamples);

        if (classMI[i] > maxMI) {
            maxMI = classMI[i];
            maxMICounter = i;
        }/*if bigger than current maximum*/
    }/*for noOfFeatures - filling classMI*/

    selectedFeatures[maxMICounter] = 1;
    outputFeatures[0] = maxMICounter;
    featureScores[0] = maxMI;

    memcpy(conditionVector,featureMatrix[maxMICounter],sizeof(int)*noOfSamples);

    /*****************************************************************************
     ** We have populated the classMI array, and selected the highest
     ** MI feature as the first output feature
     ** Now we move into the CondMI algorithm
     *****************************************************************************/

    for (i = 1; i < k; i++) {
        score = 0.0;
        currentHighestFeature = -1;
        currentScore = 0.0;

        for (j = 0; j < noOfFeatures; j++) {
            /*if we haven't selected j*/
            if (selectedFeatures[j] == 0) {
                currentScore = 0.0;

                /*double calcWeightedConditionalMutualInformation(uint *firstVector, uint *targetVector, uint *conditionVector, double *weightVector, int vectorLength);*/
                currentScore = calcWeightedConditionalMutualInformation(featureMatrix[j],classColumn,conditionVector,weightVector,noOfSamples);

                if (currentScore > score) {
                    score = currentScore;
                    currentHighestFeature = j;
                }
            }/*if j is unselected*/
        }/*for number of features*/

        outputFeatures[i] = currentHighestFeature;
        featureScores[i] = score;

        if (currentHighestFeature != -1) {
            selectedFeatures[currentHighestFeature] = 1;
            mergeArrays(featureMatrix[currentHighestFeature],conditionVector,conditionVector,noOfSamples);
        }

    }/*for the number of features to select*/

    FREE_FUNC(classMI);
    FREE_FUNC(conditionVector);
    FREE_FUNC(selectedFeatures);

    classMI = NULL;
    conditionVector = NULL;
    selectedFeatures = NULL;

    return outputFeatures;
}/*weightedCondMI(uint,uint,uint,uint[][],uint[],double[],int[],double[])*/
Exemplo n.º 2
0
uint* weightedCMIM(uint k, uint noOfSamples, uint noOfFeatures, uint **featureMatrix, uint *classColumn, double *weightVector, uint *outputFeatures, double *featureScores) {
    /*holds the class MI values
     **the class MI doubles as the partial score from the CMIM paper
     */
    double *classMI = (double *) checkedCalloc(noOfFeatures,sizeof(double));
    /*in the CMIM paper, m = lastUsedFeature*/
    uint *lastUsedFeature = (uint *) checkedCalloc(noOfFeatures,sizeof(uint));

    double score, conditionalInfo;
    int currentFeature;

    double maxMI = 0.0;
    int maxMICounter = -1;

    int j, i;

    for (i = 0; i < noOfFeatures; i++) {
        classMI[i] = calcWeightedMutualInformation(featureMatrix[i],classColumn,weightVector,noOfSamples);

        if (classMI[i] > maxMI) {
            maxMI = classMI[i];
            maxMICounter = i;
        }/*if bigger than current maximum*/
    }/*for noOfFeatures - filling classMI*/

    outputFeatures[0] = maxMICounter;
    featureScores[0] = maxMI;

    /*****************************************************************************
     ** We have populated the classMI array, and selected the highest
     ** MI feature as the first output feature
     ** Now we move into the CMIM algorithm
     *****************************************************************************/

    for (i = 1; i < k; i++) {
        score = 0.0;

        for (j = 0; j < noOfFeatures; j++) {
            while ((classMI[j] > score) && (lastUsedFeature[j] < i)) {
                /*double calcWeightedConditionalMutualInformation(uint *firstVector, uint *targetVector, uint *conditionVector, double *weightVector, int vectorLength);*/
                currentFeature = outputFeatures[lastUsedFeature[j]];
                conditionalInfo = calcWeightedConditionalMutualInformation(featureMatrix[j], classColumn, featureMatrix[currentFeature], weightVector, noOfSamples);
                if (classMI[j] > conditionalInfo) {
                    classMI[j] = conditionalInfo;
                }/*reset classMI*/
                /*moved due to C indexing from 0 rather than 1*/
                lastUsedFeature[j] += 1;
            }/*while partial score greater than score & not reached last feature*/
            if (classMI[j] > score) {
                score = classMI[j];
                featureScores[i] = score;
                outputFeatures[i] = j;
            }/*if partial score still greater than score*/
        }/*for number of features*/
    }/*for the number of features to select*/

    FREE_FUNC(classMI);
    FREE_FUNC(lastUsedFeature);

    classMI = NULL;
    lastUsedFeature = NULL;

    return outputFeatures;
}/*weightedCMIM(uint,uint,uint,uint[][],uint[],double[],uint[],double[])*/