bool KMeansQuantizer::train(MatrixDouble &trainingData){
    if( !initialized ){
        errorLog << "train(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl;
        return false;
    //Reset any previous model
    quantizerTrained = false;
    featureDataReady = false;
    //Train the KMeans model
    KMeans kmeans;
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-10 );
    kmeans.setMinNumEpochs( 10 );
	kmeans.setMaxNumEpochs( 10000 );
    if( !kmeans.trainInplace(trainingData) ){
        errorLog << "train(MatrixDouble &trainingData) - Failed to train quantizer!" << endl;
        return false;
    //Save the clusters from the KMeans model
    clusters = kmeans.getClusters();
    quantizerTrained = true;
    return true;
Ejemplo n.º 2
bool KMeansQuantizer::train_(MatrixDouble &trainingData){
    //Clear any previous model
    //Train the KMeans model
    KMeans kmeans;
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( minChange );
    kmeans.setMinNumEpochs( minNumEpochs );
	kmeans.setMaxNumEpochs( maxNumEpochs );
    if( !kmeans.train_(trainingData) ){
        errorLog << "train_(MatrixDouble &trainingData) - Failed to train quantizer!" << endl;
        return false;
    trained = true;
    initialized = true;
    numInputDimensions = trainingData.getNumCols();
    numOutputDimensions = 1; //This is always 1 for the KMeansQuantizer
    clusters = kmeans.getClusters();
    return true;
Ejemplo n.º 3
bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){

    error = 0;
    threshold = 0;

    const UINT M = trainingData.getNumSamples();
    const UINT K = (UINT)classLabels.size();

    Float giniIndexL = 0;
    Float giniIndexR = 0;
    Float weightL = 0;
    Float weightR = 0;
    VectorFloat groupCounter(2,0);
    MatrixFloat classProbabilities(K,2);

    //Use this data to train a KMeans cluster with 2 clusters
    KMeans kmeans;
    kmeans.setNumClusters( 2 );
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-5 );
    kmeans.setMinNumEpochs( 1 );
    kmeans.setMaxNumEpochs( 100 );

    //Disable the logging to clean things up
    kmeans.setTrainingLoggingEnabled( false );

    if( !kmeans.train_( data ) ){
        errorLog << __GRT_LOG__ << " Failed to train KMeans model for feature: " << featureIndex << std::endl;
        return false;

    //Set the split threshold as the mid point between the two clusters
    const MatrixFloat &clusters = kmeans.getClusters();
    threshold = 0;
    for(UINT i=0; i<clusters.getNumRows(); i++){
        threshold += clusters[i][0];
    threshold /= clusters.getNumRows();

    //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
    groupCounter[0] = groupCounter[1] = 0;
    for(UINT i=0; i<M; i++){
        groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
        groupCounter[ groupIndex[i] ]++;
        classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;

    //Compute the class probabilities for the lhs group and rhs group
    for(UINT k=0; k<K; k++){
        classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
        classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;

    //Compute the Gini index for the lhs and rhs groups
    giniIndexL = giniIndexR = 0;
    for(UINT k=0; k<K; k++){
        giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
        giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
    weightL = groupCounter[0]/M;
    weightR = groupCounter[1]/M;
    error = (giniIndexL*weightL) + (giniIndexR*weightR);

    return true;
Ejemplo n.º 4
bool KMeansFeatures::train_(MatrixDouble &trainingData){
    if( !initialized ){
        errorLog << "train_(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl;
        return false;
    //Reset any previous model
    featureDataReady = false;
    const UINT M = trainingData.getNumRows();
    const UINT N = trainingData.getNumCols();
    numInputDimensions = N;
    numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.size()-1 ];
    //Scale the input data if needed
    ranges = trainingData.getRanges();
    if( useScaling ){
        for(UINT i=0; i<M; i++){
            for(UINT j=0; j<N; j++){
                trainingData[i][j] = scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0,1.0);
    //Train the KMeans model at each layer
    const UINT K = (UINT)numClustersPerLayer.size();
    for(UINT k=0; k<K; k++){
        KMeans kmeans;
        kmeans.setNumClusters( numClustersPerLayer[k] );
        kmeans.setComputeTheta( true );
        kmeans.setMinChange( minChange );
        kmeans.setMinNumEpochs( minNumEpochs );
        kmeans.setMaxNumEpochs( maxNumEpochs );
        trainingLog << "Layer " << k+1 << "/" << K << " NumClusters: " << numClustersPerLayer[k] << endl;
        if( !kmeans.train_( trainingData ) ){
            errorLog << "train_(MatrixDouble &trainingData) - Failed to train kmeans model at layer: " << k << endl;
            return false;
        //Save the clusters
        clusters.push_back( kmeans.getClusters() );
        //Project the data through the current layer to use as training data for the next layer
        if( k+1 != K ){
            MatrixDouble data( M, numClustersPerLayer[k] );
            VectorDouble input( trainingData.getNumCols() );
            VectorDouble output( data.getNumCols() );
            for(UINT i=0; i<M; i++){
                //Copy the data into the sample
                for(UINT j=0; j<input.size(); j++){
                    input[j] = trainingData[i][j];
                //Project the sample through the current layer
                if( !projectDataThroughLayer( input, output, k ) ){
                    errorLog << "train_(MatrixDouble &trainingData) - Failed to project sample through layer: " << k << endl;
                    return false;
                //Copy the result into the training data for the next layer
                for(UINT j=0; j<output.size(); j++){
                    data[i][j] = output[j];
            //Swap the data for the next layer
            trainingData = data;
    //Flag that the kmeans model has been trained
    trained = true;
    featureVector.resize( numOutputDimensions, 0 );
    return true;