bool KMeansQuantizer::train(MatrixDouble &trainingData){
    
    if( !initialized ){
        errorLog << "train(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl;
        return false;
    }
    
    //Reset any previous model
    quantizerTrained = false;
    featureDataReady = false;
    clusters.clear();
    quantizationDistances.clear();
    
    //Train the KMeans model
    KMeans kmeans;
    kmeans.setNumClusters(numClusters);
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-10 );
    kmeans.setMinNumEpochs( 10 );
	kmeans.setMaxNumEpochs( 10000 );
    
    if( !kmeans.trainInplace(trainingData) ){
        errorLog << "train(MatrixDouble &trainingData) - Failed to train quantizer!" << endl;
        return false;
    }
    
    //Save the clusters from the KMeans model
    clusters = kmeans.getClusters();
    quantizationDistances.resize(numClusters,0);
    quantizerTrained = true;
    
    return true;
}
示例#2
0
bool KMeansQuantizer::train_(MatrixDouble &trainingData){
    
    //Clear any previous model
    clear();
    
    //Train the KMeans model
    KMeans kmeans;
    kmeans.setNumClusters(numClusters);
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( minChange );
    kmeans.setMinNumEpochs( minNumEpochs );
	kmeans.setMaxNumEpochs( maxNumEpochs );
    
    if( !kmeans.train_(trainingData) ){
        errorLog << "train_(MatrixDouble &trainingData) - Failed to train quantizer!" << endl;
        return false;
    }
    
    trained = true;
    initialized = true;
    numInputDimensions = trainingData.getNumCols();
    numOutputDimensions = 1; //This is always 1 for the KMeansQuantizer
    featureVector.resize(numOutputDimensions,0);
    clusters = kmeans.getClusters();
    quantizationDistances.resize(numClusters,0);
    
    return true;
}
示例#3
0
int main (int argc, const char * argv[])
{
    //Create a new KMeans instance
    KMeans kmeans;
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-10 );
    kmeans.setMinNumEpochs( 10 );
	kmeans.setMaxNumEpochs( 10000 );

	//There are a number of ways of training the KMeans algorithm, depending on what you need the KMeans for
	//These are:
	//- with labelled training data (in the ClassificationData format)
	//- with unlablled training data (in the UnlabelledData format)
	//- with unlabelled training data (in a simple MatrixDouble format)
	
	//This example shows you how to train the algorithm with ClassificationData
	
	//Load some training data to train the KMeans algorithm
    ClassificationData trainingData;
    
    if( !trainingData.load("LabelledClusterData.csv") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
	
    //Train the KMeans algorithm - K will automatically be set to the number of classes in the training dataset
    if( !kmeans.train( trainingData ) ){
        cout << "Failed to train model!\n";
        return EXIT_FAILURE;
    }
	
	//Get the K clusters from the KMeans instance and print them
	cout << "\nClusters:\n";
	MatrixFloat clusters = kmeans.getClusters();
    for(unsigned int k=0; k<clusters.getNumRows(); k++){
		for(unsigned int n=0; n<clusters.getNumCols(); n++){
			cout << clusters[k][n] << "\t";
		}cout << endl;
	}
	
    return EXIT_SUCCESS;
}
bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){

    error = 0;
    threshold = 0;

    const UINT M = trainingData.getNumSamples();
    const UINT K = (UINT)classLabels.size();

    Float giniIndexL = 0;
    Float giniIndexR = 0;
    Float weightL = 0;
    Float weightR = 0;
    VectorFloat groupCounter(2,0);
    MatrixFloat classProbabilities(K,2);

    //Use this data to train a KMeans cluster with 2 clusters
    KMeans kmeans;
    kmeans.setNumClusters( 2 );
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-5 );
    kmeans.setMinNumEpochs( 1 );
    kmeans.setMaxNumEpochs( 100 );

    //Disable the logging to clean things up
    kmeans.setTrainingLoggingEnabled( false );

    if( !kmeans.train_( data ) ){
        errorLog << __GRT_LOG__ << " Failed to train KMeans model for feature: " << featureIndex << std::endl;
        return false;
    }

    //Set the split threshold as the mid point between the two clusters
    const MatrixFloat &clusters = kmeans.getClusters();
    threshold = 0;
    for(UINT i=0; i<clusters.getNumRows(); i++){
        threshold += clusters[i][0];
    }
    threshold /= clusters.getNumRows();

    //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
    groupCounter[0] = groupCounter[1] = 0;
    classProbabilities.setAllValues(0);
    for(UINT i=0; i<M; i++){
        groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
        groupCounter[ groupIndex[i] ]++;
        classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
    }

    //Compute the class probabilities for the lhs group and rhs group
    for(UINT k=0; k<K; k++){
        classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
        classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
    }

    //Compute the Gini index for the lhs and rhs groups
    giniIndexL = giniIndexR = 0;
    for(UINT k=0; k<K; k++){
        giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
        giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
    }
    weightL = groupCounter[0]/M;
    weightR = groupCounter[1]/M;
    error = (giniIndexL*weightL) + (giniIndexR*weightR);

    return true;
}
示例#5
0
bool KMeansFeatures::train_(MatrixDouble &trainingData){
    
    if( !initialized ){
        errorLog << "train_(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl;
        return false;
    }
    
    //Reset any previous model
    featureDataReady = false;
    
    const UINT M = trainingData.getNumRows();
    const UINT N = trainingData.getNumCols();
    
    numInputDimensions = N;
    numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.size()-1 ];
    
    //Scale the input data if needed
    ranges = trainingData.getRanges();
    if( useScaling ){
        for(UINT i=0; i<M; i++){
            for(UINT j=0; j<N; j++){
                trainingData[i][j] = scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0,1.0);
            }
        }
    }
    
    //Train the KMeans model at each layer
    const UINT K = (UINT)numClustersPerLayer.size();
    for(UINT k=0; k<K; k++){
        KMeans kmeans;
        kmeans.setNumClusters( numClustersPerLayer[k] );
        kmeans.setComputeTheta( true );
        kmeans.setMinChange( minChange );
        kmeans.setMinNumEpochs( minNumEpochs );
        kmeans.setMaxNumEpochs( maxNumEpochs );
        
        trainingLog << "Layer " << k+1 << "/" << K << " NumClusters: " << numClustersPerLayer[k] << endl;
        if( !kmeans.train_( trainingData ) ){
            errorLog << "train_(MatrixDouble &trainingData) - Failed to train kmeans model at layer: " << k << endl;
            return false;
        }
        
        //Save the clusters
        clusters.push_back( kmeans.getClusters() );
        
        //Project the data through the current layer to use as training data for the next layer
        if( k+1 != K ){
            MatrixDouble data( M, numClustersPerLayer[k] );
            VectorDouble input( trainingData.getNumCols() );
            VectorDouble output( data.getNumCols() );
            
            for(UINT i=0; i<M; i++){
                
                //Copy the data into the sample
                for(UINT j=0; j<input.size(); j++){
                    input[j] = trainingData[i][j];
                }
                
                //Project the sample through the current layer
                if( !projectDataThroughLayer( input, output, k ) ){
                    errorLog << "train_(MatrixDouble &trainingData) - Failed to project sample through layer: " << k << endl;
                    return false;
                }
                
                //Copy the result into the training data for the next layer
                for(UINT j=0; j<output.size(); j++){
                    data[i][j] = output[j];
                }
            }
            
            //Swap the data for the next layer
            trainingData = data;
            
        }
        
    }
    
    //Flag that the kmeans model has been trained
    trained = true;
    featureVector.resize( numOutputDimensions, 0 );
    
    return true;
}