bool KMeansFeatures::train_(MatrixDouble &trainingData){ if( !initialized ){ errorLog << "train_(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl; return false; } //Reset any previous model featureDataReady = false; const UINT M = trainingData.getNumRows(); const UINT N = trainingData.getNumCols(); numInputDimensions = N; numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.size()-1 ]; //Scale the input data if needed ranges = trainingData.getRanges(); if( useScaling ){ for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ trainingData[i][j] = scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0,1.0); } } } //Train the KMeans model at each layer const UINT K = (UINT)numClustersPerLayer.size(); for(UINT k=0; k<K; k++){ KMeans kmeans; kmeans.setNumClusters( numClustersPerLayer[k] ); kmeans.setComputeTheta( true ); kmeans.setMinChange( minChange ); kmeans.setMinNumEpochs( minNumEpochs ); kmeans.setMaxNumEpochs( maxNumEpochs ); trainingLog << "Layer " << k+1 << "/" << K << " NumClusters: " << numClustersPerLayer[k] << endl; if( !kmeans.train_( trainingData ) ){ errorLog << "train_(MatrixDouble &trainingData) - Failed to train kmeans model at layer: " << k << endl; return false; } //Save the clusters clusters.push_back( kmeans.getClusters() ); //Project the data through the current layer to use as training data for the next layer if( k+1 != K ){ MatrixDouble data( M, numClustersPerLayer[k] ); VectorDouble input( trainingData.getNumCols() ); VectorDouble output( data.getNumCols() ); for(UINT i=0; i<M; i++){ //Copy the data into the sample for(UINT j=0; j<input.size(); j++){ input[j] = trainingData[i][j]; } //Project the sample through the current layer if( !projectDataThroughLayer( input, output, k ) ){ errorLog << "train_(MatrixDouble &trainingData) - Failed to project sample through layer: " << k << endl; return false; } //Copy the result into the training data for the next layer for(UINT j=0; j<output.size(); j++){ data[i][j] = output[j]; } } //Swap the data for the next layer trainingData = data; } } //Flag that the kmeans model has been trained trained = true; featureVector.resize( numOutputDimensions, 0 ); return true; }
int main (int argc, const char * argv[]) { //Create an empty matrix double MatrixDouble matrix; //Resize the matrix matrix.resize( 100, 2 ); //Set all the values in the matrix to zero matrix.setAllValues( 0 ); //Loop over the data and set the values to random values UINT counter = 0; for(UINT i=0; i<matrix.getNumRows(); i++){ for(UINT j=0; j<matrix.getNumCols(); j++){ matrix[i][j] = counter++; } } //Add a new row at the very end of the matrix VectorDouble newRow(2); newRow[0] = 1000; newRow[1] = 2000; matrix.push_back( newRow ); //Print the values cout << "Matrix Data: \n"; for(UINT i=0; i<matrix.getNumRows(); i++){ for(UINT j=0; j<matrix.getNumCols(); j++){ cout << matrix[i][j] << "\t"; } cout << endl; } cout << endl; //Get the second row as a vector VectorDouble rowVector = matrix.getRowVector( 1 ); cout << "Row Vector Data: \n"; for(UINT i=0; i<rowVector.size(); i++){ cout << rowVector[i] << "\t"; } cout << endl; //Get the second column as a vector VectorDouble colVector = matrix.getColVector( 1 ); cout << "Column Vector Data: \n"; for(UINT i=0; i<colVector.size(); i++){ cout << colVector[i] << "\n"; } cout << endl; //Get the mean of each column VectorDouble mean = matrix.getMean(); cout << "Mean: \n"; for(UINT i=0; i<mean.size(); i++){ cout << mean[i] << "\n"; } cout << endl; //Get the Standard Deviation of each column VectorDouble stdDev = matrix.getStdDev(); cout << "StdDev: \n"; for(UINT i=0; i<stdDev.size(); i++){ cout << stdDev[i] << "\n"; } cout << endl; //Get the covariance matrix MatrixDouble cov = matrix.getCovarianceMatrix(); cout << "Covariance Matrix: \n"; for(UINT i=0; i<cov.getNumRows(); i++){ for(UINT j=0; j<cov.getNumCols(); j++){ cout << cov[i][j] << "\t"; } cout << endl; } cout << endl; vector< MinMax > ranges = matrix.getRanges(); cout << "Ranges: \n"; for(UINT i=0; i<ranges.size(); i++){ cout << "i: " << i << "\tMinValue: " << ranges[i].minValue << "\tMaxValue:" << ranges[i].maxValue << "\n"; } cout << endl; //Save the matrix data to a csv file matrix.save( "data.csv" ); //load the matrix data from a csv file matrix.load( "data.csv" ); return EXIT_SUCCESS; }
bool GaussianMixtureModels::train_(MatrixDouble &data){ trained = false; //Clear any previous training results det.clear(); invSigma.clear(); numTrainingIterationsToConverge = 0; if( data.getNumRows() == 0 ){ errorLog << "train_(MatrixDouble &data) - Training Failed! Training data is empty!" << endl; return false; } //Resize the variables numTrainingSamples = data.getNumRows(); numInputDimensions = data.getNumCols(); //Resize mu and resp mu.resize(numClusters,numInputDimensions); resp.resize(numTrainingSamples,numClusters); //Resize sigma sigma.resize(numClusters); for(UINT k=0; k<numClusters; k++){ sigma[k].resize(numInputDimensions,numInputDimensions); } //Resize frac and lndets frac.resize(numClusters); lndets.resize(numClusters); //Scale the data if needed ranges = data.getRanges(); if( useScaling ){ for(UINT i=0; i<numTrainingSamples; i++){ for(UINT j=0; j<numInputDimensions; j++){ data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1); } } } //Pick K random starting points for the inital guesses of Mu Random random; vector< UINT > randomIndexs(numTrainingSamples); for(UINT i=0; i<numTrainingSamples; i++) randomIndexs[i] = i; for(UINT i=0; i<numClusters; i++){ SWAP(randomIndexs[ i ],randomIndexs[ random.getRandomNumberInt(0,numTrainingSamples) ]); } for(UINT k=0; k<numClusters; k++){ for(UINT n=0; n<numInputDimensions; n++){ mu[k][n] = data[ randomIndexs[k] ][n]; } } //Setup sigma and the uniform prior on P(k) for(UINT k=0; k<numClusters; k++){ frac[k] = 1.0/double(numClusters); for(UINT i=0; i<numInputDimensions; i++){ for(UINT j=0; j<numInputDimensions; j++) sigma[k][i][j] = 0; sigma[k][i][i] = 1.0e-2; //Set the diagonal to a small number } } loglike = 0; bool keepGoing = true; double change = 99.9e99; UINT numIterationsNoChange = 0; VectorDouble u(numInputDimensions); VectorDouble v(numInputDimensions); while( keepGoing ){ //Run the estep if( estep( data, u, v, change ) ){ //Run the mstep mstep( data ); //Check for convergance if( fabs( change ) < minChange ){ if( ++numIterationsNoChange >= minNumEpochs ){ keepGoing = false; } }else numIterationsNoChange = 0; if( ++numTrainingIterationsToConverge >= maxNumEpochs ) keepGoing = false; }else{ errorLog << "train_(MatrixDouble &data) - Estep failed at iteration " << numTrainingIterationsToConverge << endl; return false; } } //Compute the inverse of sigma and the determinants for prediction if( !computeInvAndDet() ){ det.clear(); invSigma.clear(); errorLog << "train_(MatrixDouble &data) - Failed to compute inverse and determinat!" << endl; return false; } //Flag that the model was trained trained = true; return true; }
bool KMeans::trainModel(MatrixDouble &data){ if( numClusters == 0 ){ errorLog << "trainModel(MatrixDouble &data) - Failed to train model. NumClusters is zero!" << endl; return false; } if( clusters.getNumRows() != numClusters ){ errorLog << "trainModel(MatrixDouble &data) - Failed to train model. The number of rows in the cluster matrix does not match the number of clusters! You should need to initalize the clusters matrix first before calling this function!" << endl; return false; } if( clusters.getNumCols() != numInputDimensions ){ errorLog << "trainModel(MatrixDouble &data) - Failed to train model. The number of columns in the cluster matrix does not match the number of input dimensions! You should need to initalize the clusters matrix first before calling this function!" << endl; return false; } Timer timer; UINT currentIter = 0; UINT numChanged = 0; bool keepTraining = true; double theta = 0; double lastTheta = 0; double delta = 0; double startTime = 0; thetaTracker.clear(); finalTheta = 0; numTrainingIterationsToConverge = 0; trained = false; converged = false; //Scale the data if needed ranges = data.getRanges(); if( useScaling ){ data.scale(0,1); } //Init the assign and count vectors //Assign is set to K+1 so that the nChanged values in the eStep at the first iteration will be updated correctly for(UINT m=0; m<numTrainingSamples; m++) assign[m] = numClusters+1; for(UINT k=0; k<numClusters; k++) count[k] = 0; //Run the training loop timer.start(); while( keepTraining ){ startTime = timer.getMilliSeconds(); //Compute the E step numChanged = estep( data ); //Compute the M step mstep( data ); //Update the iteration counter currentIter++; //Compute theta if needed if( computeTheta ){ theta = calculateTheta(data); delta = lastTheta - theta; lastTheta = theta; }else theta = delta = 0; //Check convergance if( numChanged == 0 && currentIter > minNumEpochs ){ converged = true; keepTraining = false; } if( currentIter >= maxNumEpochs ){ keepTraining = false; } if( fabs( delta ) < minChange && computeTheta && currentIter > minNumEpochs ){ converged = true; keepTraining = false; } if( computeTheta ) thetaTracker.push_back( theta ); trainingLog << "Epoch: " << currentIter << "/" << maxNumEpochs; trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds"; trainingLog << " Theta: " << theta << " Delta: " << delta << endl; } trainingLog << "Model Trained at epoch: " << currentIter << " with a theta value of: " << theta << endl; finalTheta = theta; numTrainingIterationsToConverge = currentIter; trained = true; return true; }
bool BernoulliRBM::train_(MatrixDouble &data){ const UINT numTrainingSamples = data.getNumRows(); numInputDimensions = data.getNumCols(); numOutputDimensions = numHiddenUnits; numVisibleUnits = numInputDimensions; trainingLog << "NumInputDimensions: " << numInputDimensions << endl; trainingLog << "NumOutputDimensions: " << numOutputDimensions << endl; if( randomizeWeightsForTraining ){ //Init the weights matrix weightsMatrix.resize(numHiddenUnits, numVisibleUnits); double a = 1.0 / numVisibleUnits; for(UINT i=0; i<numHiddenUnits; i++) { for(UINT j=0; j<numVisibleUnits; j++) { weightsMatrix[i][j] = rand.getRandomNumberUniform(-a, a); } } //Init the bias units visibleLayerBias.resize( numVisibleUnits ); hiddenLayerBias.resize( numHiddenUnits ); std::fill(visibleLayerBias.begin(),visibleLayerBias.end(),0); std::fill(hiddenLayerBias.begin(),hiddenLayerBias.end(),0); }else{ if( weightsMatrix.getNumRows() != numHiddenUnits ){ errorLog << "train_(MatrixDouble &data) - Weights matrix row size does not match the number of hidden units!" << endl; return false; } if( weightsMatrix.getNumCols() != numVisibleUnits ){ errorLog << "train_(MatrixDouble &data) - Weights matrix row size does not match the number of visible units!" << endl; return false; } if( visibleLayerBias.size() != numVisibleUnits ){ errorLog << "train_(MatrixDouble &data) - Visible layer bias size does not match the number of visible units!" << endl; return false; } if( hiddenLayerBias.size() != numHiddenUnits ){ errorLog << "train_(MatrixDouble &data) - Hidden layer bias size does not match the number of hidden units!" << endl; return false; } } //Flag the model has been trained encase the user wants to save the model during a training iteration using an observer trained = true; //Make sure the data is scaled between [0 1] ranges = data.getRanges(); if( useScaling ){ for(UINT i=0; i<numTrainingSamples; i++){ for(UINT j=0; j<numInputDimensions; j++){ data[i][j] = scale(data[i][j], ranges[j].minValue, ranges[j].maxValue, 0, 1); } } } const UINT numBatches = (UINT)ceil( numTrainingSamples/batchSize ); //Setup the batch indexs vector< BatchIndexs > batchIndexs( numBatches ); UINT startIndex = 0; for(UINT i=0; i<numBatches; i++){ batchIndexs[i].startIndex = startIndex; batchIndexs[i].endIndex = startIndex + batchSize; //Make sure the last batch end index is not larger than the number of training examples if( batchIndexs[i].endIndex >= numTrainingSamples ){ batchIndexs[i].endIndex = numTrainingSamples; } //Get the batch size batchIndexs[i].batchSize = batchIndexs[i].endIndex - batchIndexs[i].startIndex; //Set the start index for the next batch startIndex = batchIndexs[i].endIndex; } Timer timer; UINT i,j,n,epoch,noChangeCounter = 0; double startTime = 0; double alpha = learningRate; double error = 0; double err = 0; double delta = 0; double lastError = 0; vector< UINT > indexList(numTrainingSamples); TrainingResult trainingResult; MatrixDouble wT( numVisibleUnits, numHiddenUnits ); //Stores a transposed copy of the weights vector MatrixDouble vW( numHiddenUnits, numVisibleUnits ); //Stores the weight velocity updates MatrixDouble tmpW( numHiddenUnits, numVisibleUnits ); //Stores the weight values that will be used to update the main weights matrix at each batch update MatrixDouble v1( batchSize, numVisibleUnits ); //Stores the real batch data during a batch update MatrixDouble v2( batchSize, numVisibleUnits ); //Stores the sampled batch data during a batch update MatrixDouble h1( batchSize, numHiddenUnits ); //Stores the hidden states given v1 and the current weightsMatrix MatrixDouble h2( batchSize, numHiddenUnits ); //Stores the sampled hidden states given v2 and the current weightsMatrix MatrixDouble c1( numHiddenUnits, numVisibleUnits ); //Stores h1' * v1 MatrixDouble c2( numHiddenUnits, numVisibleUnits ); //Stores h2' * v2 MatrixDouble vDiff( batchSize, numVisibleUnits ); //Stores the difference between v1-v2 MatrixDouble hDiff( batchSize, numVisibleUnits ); //Stores the difference between h1-h2 MatrixDouble cDiff( numHiddenUnits, numVisibleUnits ); //Stores the difference between c1-c2 VectorDouble vDiffSum( numVisibleUnits ); //Stores the column sum of vDiff VectorDouble hDiffSum( numHiddenUnits ); //Stores the column sum of hDiff VectorDouble visibleLayerBiasVelocity( numVisibleUnits ); //Stores the velocity update of the visibleLayerBias VectorDouble hiddenLayerBiasVelocity( numHiddenUnits ); //Stores the velocity update of the hiddenLayerBias //Set all the velocity weights to zero vW.setAllValues( 0 ); std::fill(visibleLayerBiasVelocity.begin(),visibleLayerBiasVelocity.end(),0); std::fill(hiddenLayerBiasVelocity.begin(),hiddenLayerBiasVelocity.end(),0); //Randomize the order that the training samples will be used in for(UINT i=0; i<numTrainingSamples; i++) indexList[i] = i; if( randomiseTrainingOrder ){ std::random_shuffle(indexList.begin(), indexList.end()); } //Start the main training loop timer.start(); for(epoch=0; epoch<maxNumEpochs; epoch++) { startTime = timer.getMilliSeconds(); error = 0; //Randomize the batch order std::random_shuffle(batchIndexs.begin(),batchIndexs.end()); //Run each of the batch updates for(UINT k=0; k<numBatches; k+=batchStepSize){ //Resize the data matrices, the matrices will only be resized if the rows cols are different v1.resize( batchIndexs[k].batchSize, numVisibleUnits ); h1.resize( batchIndexs[k].batchSize, numHiddenUnits ); v2.resize( batchIndexs[k].batchSize, numVisibleUnits ); h2.resize( batchIndexs[k].batchSize, numHiddenUnits ); //Setup the data pointers, using data pointers saves a few ms on large matrix updates double **w_p = weightsMatrix.getDataPointer(); double **wT_p = wT.getDataPointer(); double **vW_p = vW.getDataPointer(); double **data_p = data.getDataPointer(); double **v1_p = v1.getDataPointer(); double **v2_p = v2.getDataPointer(); double **h1_p = h1.getDataPointer(); double **h2_p = h2.getDataPointer(); double *vlb_p = &visibleLayerBias[0]; double *hlb_p = &hiddenLayerBias[0]; //Get the batch data UINT index = 0; for(i=batchIndexs[k].startIndex; i<batchIndexs[k].endIndex; i++){ for(j=0; j<numVisibleUnits; j++){ v1_p[index][j] = data_p[ indexList[i] ][j]; } index++; } //Copy a transposed version of the weights matrix, this is used to compute h1 and h2 for(i=0; i<numHiddenUnits; i++) for(j=0; j<numVisibleUnits; j++) wT_p[j][i] = w_p[i][j]; //Compute h1 h1.multiple(v1, wT); for(n=0; n<batchIndexs[k].batchSize; n++){ for(i=0; i<numHiddenUnits; i++){ h1_p[n][i] = sigmoidRandom( h1_p[n][i] + hlb_p[i] ); } } //Compute v2 v2.multiple(h1, weightsMatrix); for(n=0; n<batchIndexs[k].batchSize; n++){ for(i=0; i<numVisibleUnits; i++){ v2_p[n][i] = sigmoidRandom( v2_p[n][i] + vlb_p[i] ); } } //Compute h2 h2.multiple(v2,wT); for(n=0; n<batchIndexs[k].batchSize; n++){ for(i=0; i<numHiddenUnits; i++){ h2_p[n][i] = sigmoid( h2_p[n][i] + hlb_p[i] ); } } //Compute c1, c2 and the difference between v1-v2 c1.multiple(h1,v1,true); c2.multiple(h2,v2,true); vDiff.subtract(v1, v2); //Compute the sum of vdiff for(j=0; j<numVisibleUnits; j++){ vDiffSum[j] = 0; for(i=0; i<batchIndexs[k].batchSize; i++){ vDiffSum[j] += vDiff[i][j]; } } //Compute the difference between h1 and h2 hDiff.subtract(h1, h2); for(j=0; j<numHiddenUnits; j++){ hDiffSum[j] = 0; for(i=0; i<batchIndexs[k].batchSize; i++){ hDiffSum[j] += hDiff[i][j]; } } //Compute the difference between c1 and c2 cDiff.subtract(c1,c2); //Update the weight velocities for(i=0; i<numHiddenUnits; i++){ for(j=0; j<numVisibleUnits; j++){ vW_p[i][j] = ((momentum * vW_p[i][j]) + (alpha * cDiff[i][j])) / batchIndexs[k].batchSize; } } for(i=0; i<numVisibleUnits; i++){ visibleLayerBiasVelocity[i] = ((momentum * visibleLayerBiasVelocity[i]) + (alpha * vDiffSum[i])) / batchIndexs[k].batchSize; } for(i=0; i<numHiddenUnits; i++){ hiddenLayerBiasVelocity[i] = ((momentum * hiddenLayerBiasVelocity[i]) + (alpha * hDiffSum[i])) / batchIndexs[k].batchSize; } //Update the weights weightsMatrix.add( vW ); //Update the bias for the visible layer for(i=0; i<numVisibleUnits; i++){ visibleLayerBias[i] += visibleLayerBiasVelocity[i]; } //Update the bias for the visible layer for(i=0; i<numHiddenUnits; i++){ hiddenLayerBias[i] += hiddenLayerBiasVelocity[i]; } //Compute the reconstruction error err = 0; for(i=0; i<batchIndexs[k].batchSize; i++){ for(j=0; j<numVisibleUnits; j++){ err += SQR( v1[i][j] - v2[i][j] ); } } error += err / batchIndexs[k].batchSize; } error /= numBatches; delta = lastError - error; lastError = error; trainingLog << "Epoch: " << epoch+1 << "/" << maxNumEpochs; trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds"; trainingLog << " Learning rate: " << alpha; trainingLog << " Momentum: " << momentum; trainingLog << " Average reconstruction error: " << error; trainingLog << " Delta: " << delta << endl; //Update the learning rate alpha *= learningRateUpdate; trainingResult.setClassificationResult(epoch, error, this); trainingResults.push_back(trainingResult); trainingResultsObserverManager.notifyObservers( trainingResult ); //Check for convergance if( fabs(delta) < minChange ){ if( ++noChangeCounter >= minNumEpochs ){ trainingLog << "Stopping training. MinChange limit reached!" << endl; break; } }else noChangeCounter = 0; } trainingLog << "Training complete after " << epoch << " epochs. Total training time: " << timer.getMilliSeconds()/1000.0 << " seconds" << endl; trained = true; return true; }