Пример #1
0
bool KMeansFeatures::train_(MatrixDouble &trainingData){
    
    if( !initialized ){
        errorLog << "train_(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl;
        return false;
    }
    
    //Reset any previous model
    featureDataReady = false;
    
    const UINT M = trainingData.getNumRows();
    const UINT N = trainingData.getNumCols();
    
    numInputDimensions = N;
    numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.size()-1 ];
    
    //Scale the input data if needed
    ranges = trainingData.getRanges();
    if( useScaling ){
        for(UINT i=0; i<M; i++){
            for(UINT j=0; j<N; j++){
                trainingData[i][j] = scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0,1.0);
            }
        }
    }
    
    //Train the KMeans model at each layer
    const UINT K = (UINT)numClustersPerLayer.size();
    for(UINT k=0; k<K; k++){
        KMeans kmeans;
        kmeans.setNumClusters( numClustersPerLayer[k] );
        kmeans.setComputeTheta( true );
        kmeans.setMinChange( minChange );
        kmeans.setMinNumEpochs( minNumEpochs );
        kmeans.setMaxNumEpochs( maxNumEpochs );
        
        trainingLog << "Layer " << k+1 << "/" << K << " NumClusters: " << numClustersPerLayer[k] << endl;
        if( !kmeans.train_( trainingData ) ){
            errorLog << "train_(MatrixDouble &trainingData) - Failed to train kmeans model at layer: " << k << endl;
            return false;
        }
        
        //Save the clusters
        clusters.push_back( kmeans.getClusters() );
        
        //Project the data through the current layer to use as training data for the next layer
        if( k+1 != K ){
            MatrixDouble data( M, numClustersPerLayer[k] );
            VectorDouble input( trainingData.getNumCols() );
            VectorDouble output( data.getNumCols() );
            
            for(UINT i=0; i<M; i++){
                
                //Copy the data into the sample
                for(UINT j=0; j<input.size(); j++){
                    input[j] = trainingData[i][j];
                }
                
                //Project the sample through the current layer
                if( !projectDataThroughLayer( input, output, k ) ){
                    errorLog << "train_(MatrixDouble &trainingData) - Failed to project sample through layer: " << k << endl;
                    return false;
                }
                
                //Copy the result into the training data for the next layer
                for(UINT j=0; j<output.size(); j++){
                    data[i][j] = output[j];
                }
            }
            
            //Swap the data for the next layer
            trainingData = data;
            
        }
        
    }
    
    //Flag that the kmeans model has been trained
    trained = true;
    featureVector.resize( numOutputDimensions, 0 );
    
    return true;
}
int main (int argc, const char * argv[])
{
    //Create an empty matrix double
    MatrixDouble matrix;
    
    //Resize the matrix
    matrix.resize( 100, 2 );
    
    //Set all the values in the matrix to zero
    matrix.setAllValues( 0 );
    
    //Loop over the data and set the values to random values
    UINT counter = 0;
    for(UINT i=0; i<matrix.getNumRows(); i++){
        for(UINT j=0; j<matrix.getNumCols(); j++){
            matrix[i][j] = counter++;
        }
    }
    
    //Add a new row at the very end of the matrix
    VectorDouble newRow(2);
    newRow[0] = 1000;
    newRow[1] = 2000;
    matrix.push_back( newRow );
    
    //Print the values
    cout << "Matrix Data: \n";
    for(UINT i=0; i<matrix.getNumRows(); i++){
        for(UINT j=0; j<matrix.getNumCols(); j++){
            cout << matrix[i][j] << "\t";
        }
        cout << endl;
    }
    cout << endl;
    
    //Get the second row as a vector
    VectorDouble rowVector = matrix.getRowVector( 1 );
    
    cout << "Row Vector Data: \n";
    for(UINT i=0; i<rowVector.size(); i++){
        cout << rowVector[i] << "\t";
    }
    cout << endl;
    
    //Get the second column as a vector
    VectorDouble colVector = matrix.getColVector( 1 );
    
    cout << "Column Vector Data: \n";
    for(UINT i=0; i<colVector.size(); i++){
        cout << colVector[i] << "\n";
    }
    cout << endl;
    
    //Get the mean of each column
	VectorDouble mean = matrix.getMean();
	
	cout << "Mean: \n";
    for(UINT i=0; i<mean.size(); i++){
        cout << mean[i] << "\n";
    }
    cout << endl;
	
	//Get the Standard Deviation of each column
	VectorDouble stdDev = matrix.getStdDev();
	
	cout << "StdDev: \n";
    for(UINT i=0; i<stdDev.size(); i++){
        cout << stdDev[i] << "\n";
    }
    cout << endl;
	
	//Get the covariance matrix
	MatrixDouble cov = matrix.getCovarianceMatrix();
	
	cout << "Covariance Matrix: \n";
    for(UINT i=0; i<cov.getNumRows(); i++){
        for(UINT j=0; j<cov.getNumCols(); j++){
            cout << cov[i][j] << "\t";
        }
        cout << endl;
    }
    cout << endl;

	vector< MinMax > ranges = matrix.getRanges();
	
	cout << "Ranges: \n";
    for(UINT i=0; i<ranges.size(); i++){
        cout << "i: " << i << "\tMinValue: " << ranges[i].minValue << "\tMaxValue:" << ranges[i].maxValue << "\n";
    }
    cout << endl;
    
    //Save the matrix data to a csv file
    matrix.save( "data.csv" );
    
    //load the matrix data from a csv file
    matrix.load( "data.csv" );
    
    return EXIT_SUCCESS;
}
Пример #3
0
bool GaussianMixtureModels::train_(MatrixDouble &data){
    
    trained = false;
    
    //Clear any previous training results
    det.clear();
    invSigma.clear();
    numTrainingIterationsToConverge = 0;
    
    if( data.getNumRows() == 0 ){
        errorLog << "train_(MatrixDouble &data) - Training Failed! Training data is empty!" << endl;
        return false;
    }
    
    //Resize the variables
    numTrainingSamples = data.getNumRows();
    numInputDimensions = data.getNumCols();
    
    //Resize mu and resp
    mu.resize(numClusters,numInputDimensions);
    resp.resize(numTrainingSamples,numClusters);
    
    //Resize sigma
    sigma.resize(numClusters);
    for(UINT k=0; k<numClusters; k++){
        sigma[k].resize(numInputDimensions,numInputDimensions);
    }
    
    //Resize frac and lndets
    frac.resize(numClusters);
    lndets.resize(numClusters);
    
    //Scale the data if needed
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<numTrainingSamples; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
            }
        }
    }
    
    //Pick K random starting points for the inital guesses of Mu
    Random random;
    vector< UINT > randomIndexs(numTrainingSamples);
    for(UINT i=0; i<numTrainingSamples; i++) randomIndexs[i] = i;
    for(UINT i=0; i<numClusters; i++){
        SWAP(randomIndexs[ i ],randomIndexs[ random.getRandomNumberInt(0,numTrainingSamples) ]);
    }
    for(UINT k=0; k<numClusters; k++){
        for(UINT n=0; n<numInputDimensions; n++){
            mu[k][n] = data[ randomIndexs[k] ][n];
        }
    }
    
    //Setup sigma and the uniform prior on P(k)
    for(UINT k=0; k<numClusters; k++){
        frac[k] = 1.0/double(numClusters);
        for(UINT i=0; i<numInputDimensions; i++){
            for(UINT j=0; j<numInputDimensions; j++) sigma[k][i][j] = 0;
            sigma[k][i][i] = 1.0e-2;   //Set the diagonal to a small number
        }
    }
    
    loglike = 0;
    bool keepGoing = true;
    double change = 99.9e99;
    UINT numIterationsNoChange = 0;
    VectorDouble u(numInputDimensions);
	VectorDouble v(numInputDimensions);
    
    while( keepGoing ){
        
        //Run the estep
        if( estep( data, u, v, change ) ){
            
            //Run the mstep
            mstep( data );
        
            //Check for convergance
            if( fabs( change ) < minChange ){
                if( ++numIterationsNoChange >= minNumEpochs ){
                    keepGoing = false;
                }
            }else numIterationsNoChange = 0;
            if( ++numTrainingIterationsToConverge >= maxNumEpochs ) keepGoing = false;
            
        }else{
            errorLog << "train_(MatrixDouble &data) - Estep failed at iteration " << numTrainingIterationsToConverge << endl;
            return false;
        }
    }
    
    //Compute the inverse of sigma and the determinants for prediction
    if( !computeInvAndDet() ){
        det.clear();
        invSigma.clear();
        errorLog << "train_(MatrixDouble &data) - Failed to compute inverse and determinat!" << endl;
        return false;
    }
    
    //Flag that the model was trained
    trained = true;
    
    return true;
}
Пример #4
0
bool KMeans::trainModel(MatrixDouble &data){
    
    if( numClusters == 0 ){
        errorLog << "trainModel(MatrixDouble &data) - Failed to train model. NumClusters is zero!" << endl;
		return false;
	}
    
    if( clusters.getNumRows() != numClusters ){
        errorLog << "trainModel(MatrixDouble &data) - Failed to train model. The number of rows in the cluster matrix does not match the number of clusters! You should need to initalize the clusters matrix first before calling this function!" << endl;
		return false;
	}
    
    if( clusters.getNumCols() != numInputDimensions ){
        errorLog << "trainModel(MatrixDouble &data) - Failed to train model. The number of columns in the cluster matrix does not match the number of input dimensions! You should need to initalize the clusters matrix first before calling this function!" << endl;
		return false;
	}

    Timer timer;
	UINT currentIter = 0;
    UINT numChanged = 0;
	bool keepTraining = true;
    double theta = 0;
    double lastTheta = 0;
    double delta = 0;
    double startTime = 0;
    thetaTracker.clear();
    finalTheta = 0;
    numTrainingIterationsToConverge = 0;
    trained = false;
    converged = false;
    
    //Scale the data if needed
    ranges = data.getRanges();
    if( useScaling ){
        data.scale(0,1);
    }

    //Init the assign and count vectors
    //Assign is set to K+1 so that the nChanged values in the eStep at the first iteration will be updated correctly
    for(UINT m=0; m<numTrainingSamples; m++) assign[m] = numClusters+1;
	for(UINT k=0; k<numClusters; k++) count[k] = 0;

    //Run the training loop
    timer.start();
	while( keepTraining ){
        startTime = timer.getMilliSeconds();

		//Compute the E step
		numChanged = estep( data );

        //Compute the M step
        mstep( data );

        //Update the iteration counter
		currentIter++;

		//Compute theta if needed
		if( computeTheta ){
            theta = calculateTheta(data);
            delta = lastTheta - theta;
            lastTheta = theta;
        }else theta = delta = 0;
        
        //Check convergance
		if( numChanged == 0 && currentIter > minNumEpochs ){ converged = true; keepTraining = false; }
		if( currentIter >= maxNumEpochs ){ keepTraining = false; }
		if( fabs( delta ) < minChange && computeTheta && currentIter > minNumEpochs ){ converged = true; keepTraining = false; }
        if( computeTheta )  thetaTracker.push_back( theta );
        
        trainingLog << "Epoch: " << currentIter << "/" << maxNumEpochs;
        trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds";
        trainingLog << " Theta: " << theta << " Delta: " << delta << endl;
	}
    trainingLog << "Model Trained at epoch: " << currentIter << " with a theta value of: " << theta << endl;

    finalTheta = theta;
    numTrainingIterationsToConverge = currentIter;
	trained = true;
	
	return true;
}
bool BernoulliRBM::train_(MatrixDouble &data){
    
    const UINT numTrainingSamples = data.getNumRows();
    numInputDimensions = data.getNumCols();
    numOutputDimensions = numHiddenUnits;
    numVisibleUnits = numInputDimensions;
    
    trainingLog << "NumInputDimensions: " << numInputDimensions << endl;
    trainingLog << "NumOutputDimensions: " << numOutputDimensions << endl;
    
    if( randomizeWeightsForTraining ){
    
        //Init the weights matrix
        weightsMatrix.resize(numHiddenUnits, numVisibleUnits);
        
        double a = 1.0 / numVisibleUnits;
        for(UINT i=0; i<numHiddenUnits; i++) {
            for(UINT j=0; j<numVisibleUnits; j++) {
                weightsMatrix[i][j] = rand.getRandomNumberUniform(-a, a);
            }
        }

        //Init the bias units
        visibleLayerBias.resize( numVisibleUnits );
        hiddenLayerBias.resize( numHiddenUnits );
        std::fill(visibleLayerBias.begin(),visibleLayerBias.end(),0);
        std::fill(hiddenLayerBias.begin(),hiddenLayerBias.end(),0);
        
    }else{
        if( weightsMatrix.getNumRows() != numHiddenUnits ){
            errorLog << "train_(MatrixDouble &data) - Weights matrix row size does not match the number of hidden units!" << endl;
            return false;
        }
        if( weightsMatrix.getNumCols() != numVisibleUnits ){
            errorLog << "train_(MatrixDouble &data) - Weights matrix row size does not match the number of visible units!" << endl;
            return false;
        }
        if( visibleLayerBias.size() != numVisibleUnits ){
            errorLog << "train_(MatrixDouble &data) - Visible layer bias size does not match the number of visible units!" << endl;
            return false;
        }
        if( hiddenLayerBias.size() != numHiddenUnits ){
            errorLog << "train_(MatrixDouble &data) - Hidden layer bias size does not match the number of hidden units!" << endl;
            return false;
        }
    }
    
    //Flag the model has been trained encase the user wants to save the model during a training iteration using an observer
    trained = true;
    
    //Make sure the data is scaled between [0 1]
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<numTrainingSamples; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = scale(data[i][j], ranges[j].minValue, ranges[j].maxValue, 0, 1);
            }
        }
    }
    
    const UINT numBatches = (UINT)ceil( numTrainingSamples/batchSize );
    
    //Setup the batch indexs
    vector< BatchIndexs > batchIndexs( numBatches );
    UINT startIndex = 0;
    for(UINT i=0; i<numBatches; i++){
        batchIndexs[i].startIndex = startIndex;
        batchIndexs[i].endIndex = startIndex + batchSize;
        
        //Make sure the last batch end index is not larger than the number of training examples
        if( batchIndexs[i].endIndex >= numTrainingSamples ){
            batchIndexs[i].endIndex = numTrainingSamples;
        }
        
        //Get the batch size
        batchIndexs[i].batchSize = batchIndexs[i].endIndex - batchIndexs[i].startIndex;
        
        //Set the start index for the next batch
        startIndex = batchIndexs[i].endIndex;
    }
    
    Timer timer;
    UINT i,j,n,epoch,noChangeCounter = 0;
    double startTime = 0;
    double alpha = learningRate;
    double error = 0;
    double err = 0;
    double delta = 0;
    double lastError = 0;
    vector< UINT > indexList(numTrainingSamples);
    TrainingResult trainingResult;
    MatrixDouble wT( numVisibleUnits, numHiddenUnits );       //Stores a transposed copy of the weights vector
    MatrixDouble vW( numHiddenUnits, numVisibleUnits );       //Stores the weight velocity updates
    MatrixDouble tmpW( numHiddenUnits, numVisibleUnits );     //Stores the weight values that will be used to update the main weights matrix at each batch update
    MatrixDouble v1( batchSize, numVisibleUnits );            //Stores the real batch data during a batch update
    MatrixDouble v2( batchSize, numVisibleUnits );            //Stores the sampled batch data during a batch update
    MatrixDouble h1( batchSize, numHiddenUnits );             //Stores the hidden states given v1 and the current weightsMatrix
    MatrixDouble h2( batchSize, numHiddenUnits );             //Stores the sampled hidden states given v2 and the current weightsMatrix
    MatrixDouble c1( numHiddenUnits, numVisibleUnits );       //Stores h1' * v1
    MatrixDouble c2( numHiddenUnits, numVisibleUnits );       //Stores h2' * v2
    MatrixDouble vDiff( batchSize, numVisibleUnits );         //Stores the difference between v1-v2
    MatrixDouble hDiff( batchSize, numVisibleUnits );         //Stores the difference between h1-h2
    MatrixDouble cDiff( numHiddenUnits, numVisibleUnits );    //Stores the difference between c1-c2
    VectorDouble vDiffSum( numVisibleUnits );                 //Stores the column sum of vDiff
    VectorDouble hDiffSum( numHiddenUnits );                  //Stores the column sum of hDiff
    VectorDouble visibleLayerBiasVelocity( numVisibleUnits ); //Stores the velocity update of the visibleLayerBias
    VectorDouble hiddenLayerBiasVelocity( numHiddenUnits );   //Stores the velocity update of the hiddenLayerBias
    
    //Set all the velocity weights to zero
    vW.setAllValues( 0 );
    std::fill(visibleLayerBiasVelocity.begin(),visibleLayerBiasVelocity.end(),0);
    std::fill(hiddenLayerBiasVelocity.begin(),hiddenLayerBiasVelocity.end(),0);
    
    //Randomize the order that the training samples will be used in
    for(UINT i=0; i<numTrainingSamples; i++) indexList[i] = i;
    if( randomiseTrainingOrder ){
        std::random_shuffle(indexList.begin(), indexList.end());
    }
    
    //Start the main training loop
    timer.start();
    for(epoch=0; epoch<maxNumEpochs; epoch++) {
        startTime = timer.getMilliSeconds();
        error = 0;
        
        //Randomize the batch order
        std::random_shuffle(batchIndexs.begin(),batchIndexs.end());
        
        //Run each of the batch updates
        for(UINT k=0; k<numBatches; k+=batchStepSize){
            
            //Resize the data matrices, the matrices will only be resized if the rows cols are different
            v1.resize( batchIndexs[k].batchSize, numVisibleUnits );
            h1.resize( batchIndexs[k].batchSize, numHiddenUnits );
            v2.resize( batchIndexs[k].batchSize, numVisibleUnits );
            h2.resize( batchIndexs[k].batchSize, numHiddenUnits );
            
            //Setup the data pointers, using data pointers saves a few ms on large matrix updates
            double **w_p = weightsMatrix.getDataPointer();
            double **wT_p = wT.getDataPointer();
            double **vW_p = vW.getDataPointer();
            double **data_p = data.getDataPointer();
            double **v1_p = v1.getDataPointer();
            double **v2_p = v2.getDataPointer();
            double **h1_p = h1.getDataPointer();
            double **h2_p = h2.getDataPointer();
            double *vlb_p = &visibleLayerBias[0];
            double *hlb_p = &hiddenLayerBias[0];
            
            //Get the batch data
            UINT index = 0;
            for(i=batchIndexs[k].startIndex; i<batchIndexs[k].endIndex; i++){
                for(j=0; j<numVisibleUnits; j++){
                    v1_p[index][j] = data_p[ indexList[i] ][j];
                }
                index++;
            }
            
            //Copy a transposed version of the weights matrix, this is used to compute h1 and h2
            for(i=0; i<numHiddenUnits; i++)
                for(j=0; j<numVisibleUnits; j++)
                    wT_p[j][i] = w_p[i][j];
            
            //Compute h1
            h1.multiple(v1, wT);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numHiddenUnits; i++){
                    h1_p[n][i] = sigmoidRandom( h1_p[n][i] + hlb_p[i] );
                }
            }
            
            //Compute v2
            v2.multiple(h1, weightsMatrix);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numVisibleUnits; i++){
                    v2_p[n][i] = sigmoidRandom( v2_p[n][i] + vlb_p[i] );
                }
            }
            
            //Compute h2
            h2.multiple(v2,wT);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numHiddenUnits; i++){
                    h2_p[n][i] = sigmoid( h2_p[n][i] + hlb_p[i] );
                }
            }
            
            //Compute c1, c2 and the difference between v1-v2
            c1.multiple(h1,v1,true);
            c2.multiple(h2,v2,true);
            vDiff.subtract(v1, v2);
            
            //Compute the sum of vdiff
            for(j=0; j<numVisibleUnits; j++){
                vDiffSum[j] = 0;
                for(i=0; i<batchIndexs[k].batchSize; i++){
                    vDiffSum[j] += vDiff[i][j];
                }
            }
            
            //Compute the difference between h1 and h2
            hDiff.subtract(h1, h2);
            for(j=0; j<numHiddenUnits; j++){
                hDiffSum[j] = 0;
                for(i=0; i<batchIndexs[k].batchSize; i++){
                    hDiffSum[j] += hDiff[i][j];
                }
            }
            
            //Compute the difference between c1 and c2
            cDiff.subtract(c1,c2);
            
            //Update the weight velocities
            for(i=0; i<numHiddenUnits; i++){
                for(j=0; j<numVisibleUnits; j++){
                    vW_p[i][j] = ((momentum * vW_p[i][j]) + (alpha * cDiff[i][j])) / batchIndexs[k].batchSize;
                }
            }
            for(i=0; i<numVisibleUnits; i++){
                visibleLayerBiasVelocity[i] = ((momentum * visibleLayerBiasVelocity[i]) + (alpha * vDiffSum[i])) / batchIndexs[k].batchSize;
            }
            for(i=0; i<numHiddenUnits; i++){
                hiddenLayerBiasVelocity[i] = ((momentum * hiddenLayerBiasVelocity[i]) + (alpha * hDiffSum[i])) / batchIndexs[k].batchSize;
            }
            
            //Update the weights
            weightsMatrix.add( vW );
            
            //Update the bias for the visible layer
            for(i=0; i<numVisibleUnits; i++){
                visibleLayerBias[i] += visibleLayerBiasVelocity[i];
            }
            
            //Update the bias for the visible layer
            for(i=0; i<numHiddenUnits; i++){
                hiddenLayerBias[i] += hiddenLayerBiasVelocity[i];
            }
            
            //Compute the reconstruction error
            err = 0;
            for(i=0; i<batchIndexs[k].batchSize; i++){
                for(j=0; j<numVisibleUnits; j++){
                    err += SQR( v1[i][j] - v2[i][j] );
                }
            }
            
            error += err / batchIndexs[k].batchSize;
        }
        error /= numBatches;
        delta = lastError - error;
        lastError = error;
        
        trainingLog << "Epoch: " << epoch+1 << "/" << maxNumEpochs;
        trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds";
        trainingLog << " Learning rate: " << alpha;
        trainingLog << " Momentum: " << momentum;
        trainingLog << " Average reconstruction error: " << error;
        trainingLog << " Delta: " << delta << endl;
        
        //Update the learning rate
        alpha *= learningRateUpdate;
        
        trainingResult.setClassificationResult(epoch, error, this);
        trainingResults.push_back(trainingResult);
        trainingResultsObserverManager.notifyObservers( trainingResult );
        
        //Check for convergance
        if( fabs(delta) < minChange ){
            if( ++noChangeCounter >= minNumEpochs ){
                trainingLog << "Stopping training. MinChange limit reached!" << endl;
                break;
            }
        }else noChangeCounter = 0;
        
    }
    trainingLog << "Training complete after " << epoch << " epochs. Total training time: " << timer.getMilliSeconds()/1000.0 << " seconds" << endl;
    
    trained = true;
    
    return true;
}