C++ (Cpp) ClassificationData::getClassProbabilities Beispiele

Programmiersprache: C++ (Cpp)

Klasse / Typ: ClassificationData

Methode / Funktion: getClassProbabilities

Beispiele auf hotexamples.com: 3

C++ (Cpp) ClassificationData::getClassProbabilities - 3 Beispiele gefunden. Dies sind die am besten bewerteten C++ (Cpp) Beispiele für die ClassificationData::getClassProbabilities, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

getNumSamples(30)

getNumDimensions(26)

load(17)

getNumClasses(17)

getRanges(16)

loadDatasetFromFile(13)

addSample(9)

getClassTracker(8)

getClassData(5)

getClassLabels(4)

addClass(4)

getDataAsMatrixDouble(4)

getDataAsMatrixFloat(4)

getBootstrappedDataset(3)

getClassProbabilities(3)

loadDatasetFromCSVFile(3)

GetAvailableLabels(2)

getMean(1)

getInfoText(1)

getTestFoldData(1)

getTrainingFoldData(1)

getDatasetName(1)

CalculateNormalizedHistogram(1)

getCovarianceMatrix(1)

clear(1)

Partition(1)

NewBag(1)

LoadChunkForLabel(1)

GetMinMax(1)

GetInformationGain(1)

GetFeatures(1)

GetDimensions(1)

GetCountPerLabel(1)

GetCount(1)

getClassMean(1)

Beispiel #1

Datei anzeigen

Datei: DecisionTreeClusterNode.cpp Projekt: nickgillian/grt

bool DecisionTreeClusterNode::computeSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){

    const UINT M = trainingData.getNumSamples();
    const UINT N = features.getSize();
    const UINT K = classLabels.getSize();

    if( N == 0 ) return false;
    if( K == 0 ) return false;

    minError = grt_numeric_limits< Float >::max();
    Random random;
    UINT bestFeatureIndex = 0;
    Float bestThreshold = 0;
    Float error = 0;
    Vector< UINT > groupIndex(M);
    Vector< MinMax > ranges = trainingData.getRanges();
    MatrixDouble data(M,1); //This will store our temporary data for each dimension

    //Randomly select which features we want to use
    UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
    Vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures );

    //Loop over each random feature and try and find the best split point
    for(UINT n=0; n<numRandomFeatures; n++){

        featureIndex = features[ randomFeatures[n] ];

        //Use the data in this feature dimension to create a sum dataset
        for(UINT i=0; i<M; i++){
            data[i][0] = trainingData[i][featureIndex];
        }

        if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
            //Store the best threshold and feature index
            if( error < minError ){
                minError = error;
                bestThreshold = threshold;
                bestFeatureIndex = featureIndex;
            }
        }
     }

     //Set the best feature index that will be returned to the DecisionTree that called this function
     featureIndex = bestFeatureIndex;

     //Store the node size, feature index, best threshold and class probabilities for this node
     set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );

     return true;
}

Beispiel #2

Datei anzeigen

Datei: DecisionTreeThresholdNode.cpp Projekt: nickgillian/grt

bool DecisionTreeThresholdNode::computeBestSplitBestRandomSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
    
    const UINT M = trainingData.getNumSamples();
    const UINT N = (UINT)features.size();
    const UINT K = (UINT)classLabels.size();
    
    if( N == 0 ) return false;
    
    minError = grt_numeric_limits< Float >::max();
    UINT bestFeatureIndex = 0;
    Float bestThreshold = 0;
    Float error = 0;
    Float giniIndexL = 0;
    Float giniIndexR = 0;
    Float weightL = 0;
    Float weightR = 0;
    Random random;
    Vector< UINT > groupIndex(M);
    VectorFloat groupCounter(2,0);
    
    MatrixFloat classProbabilities(K,2);

    //Loop over each feature and try and find the best split point
    UINT m,n;
    const UINT numFeatures = features.getSize();
    for(m=0; m<numSplittingSteps; m++){
        //Chose a random feature
        n = random.getRandomNumberInt(0,numFeatures);
        featureIndex = features[n];
        
        //Randomly choose the threshold, the threshold is based on a randomly selected sample with some random scaling
        threshold = trainingData[ random.getRandomNumberInt(0,M) ][ featureIndex ] * random.getRandomNumberUniform(0.8,1.2);
        
        //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group
        groupCounter[0] = groupCounter[1] = 0;
        classProbabilities.setAllValues(0);
        for(UINT i=0; i<M; i++){
            groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
            groupCounter[ groupIndex[i] ]++;
            classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
        }
        
        //Compute the class probabilities for the lhs group and rhs group
        for(UINT k=0; k<K; k++){
            classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
            classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
        }
        
        //Compute the Gini index for the lhs and rhs groups
        giniIndexL = giniIndexR = 0;
        for(UINT k=0; k<K; k++){
            giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
            giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
        }
        weightL = groupCounter[0]/M;
        weightR = groupCounter[1]/M;
        error = (giniIndexL*weightL) + (giniIndexR*weightR);
        
        //Store the best threshold and feature index
        if( error < minError ){
            minError = error;
            bestThreshold = threshold;
            bestFeatureIndex = featureIndex;
        }
    }
    
    //Set the best feature index that will be returned to the DecisionTree that called this function
    featureIndex = bestFeatureIndex;
    
    //Store the node size, feature index, best threshold and class probabilities for this node
    set(M,featureIndex,bestThreshold,trainingData.getClassProbabilities(classLabels));
    
    return true;
}

Beispiel #3

Datei anzeigen

Datei: DecisionTreeThresholdNode.cpp Projekt: nickgillian/grt

bool DecisionTreeThresholdNode::computeBestSplitBestIterativeSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){
    
    const UINT M = trainingData.getNumSamples();
    const UINT N = features.getSize();
    const UINT K = classLabels.getSize();
    
    if( N == 0 ) return false;
    
    minError = grt_numeric_limits< Float >::max();
    UINT bestFeatureIndex = 0;
    Float bestThreshold = 0;
    Float error = 0;
    Float minRange = 0;
    Float maxRange = 0;
    Float step = 0;
    Float giniIndexL = 0;
    Float giniIndexR = 0;
    Float weightL = 0;
    Float weightR = 0;
    Vector< UINT > groupIndex(M);
    VectorFloat groupCounter(2,0);
    Vector< MinMax > ranges = trainingData.getRanges();
    
    MatrixFloat classProbabilities(K,2);
    
    //Loop over each feature and try and find the best split point
    for(UINT n=0; n<N; n++){
        minRange = ranges[n].minValue;
        maxRange = ranges[n].maxValue;
        step = (maxRange-minRange)/Float(numSplittingSteps);
        threshold = minRange;
        featureIndex = features[n];
        while( threshold <= maxRange ){
            
            //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group
            groupCounter[0] = groupCounter[1] = 0;
            classProbabilities.setAllValues(0);
            for(UINT i=0; i<M; i++){
                groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
                groupCounter[ groupIndex[i] ]++;
                classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
            }
            
            //Compute the class probabilities for the lhs group and rhs group
            for(UINT k=0; k<K; k++){
                classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
                classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
            }
            
            //Compute the Gini index for the lhs and rhs groups
            giniIndexL = giniIndexR = 0;
            for(UINT k=0; k<K; k++){
                giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
                giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
            }
            weightL = groupCounter[0]/M;
            weightR = groupCounter[1]/M;
            error = (giniIndexL*weightL) + (giniIndexR*weightR);
            
            //Store the best threshold and feature index
            if( error < minError ){
                minError = error;
                bestThreshold = threshold;
                bestFeatureIndex = featureIndex;
            }
            
            //Update the threshold
            threshold += step;
        }
    }
    
    //Set the best feature index that will be returned to the DecisionTree that called this function
    featureIndex = bestFeatureIndex;
    
    //Store the node size, feature index, best threshold and class probabilities for this node
    set(M,featureIndex,bestThreshold,trainingData.getClassProbabilities(classLabels));
    
    return true;
}