bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){ Float error = 0; Float errorSum = 0; Float lastErrorSum = 0; Float delta = 0; const UINT N = data.getNumDimensions(); const UINT M = data.getNumSamples(); UINT iter = 0; bool keepTraining = true; Random random; VectorFloat y(M); VectorFloat batchMean(N); Vector< UINT > randomTrainingOrder(M); Vector< VectorFloat > batchData(batchSize,VectorFloat(N)); //Init the model model.init( classLabel, N ); //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0) for(UINT i=0; i<M; i++){ y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0; } //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Clear any previous training results trainingResults.clear(); trainingResults.reserve( maxNumEpochs ); TrainingResult epochResult; //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent errorSum = 0; UINT m=0; while( m < M ){ //Get the batch data for this update UINT roundSize = m+batchSize < M ? batchSize : M-m; batchMean.fill(0.0); for(UINT i=0; i<roundSize; i++){ for(UINT j=0; j<N; j++){ batchData[i][j] = data[ randomTrainingOrder[m+i] ][j]; batchMean[j] += batchData[i][j]; } } for(UINT j=0; j<N; j++) batchMean[j] /= roundSize; //Compute the error on this batch, given the current weights error = 0.0; for(UINT i=0; i<roundSize; i++){ error += y[ randomTrainingOrder[m+i] ] - model.compute( batchData[i] ); } error /= roundSize; errorSum += error; //Update the weights for(UINT j=0; j<N; j++){ model.w[j] += learningRate * error * batchMean[j]; } model.w0 += learningRate * error; m += roundSize; } //Compute the error delta = fabs( errorSum-lastErrorSum ); lastErrorSum = errorSum; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Class: " << classLabel << " Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl; epochResult.setClassificationResult( iter, errorSum, this ); trainingResults.push_back( epochResult ); } return true; }
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){ Float error = 0; Float errorSum = 0; Float lastErrorSum = 0; Float delta = 0; UINT N = data.getNumDimensions(); UINT M = data.getNumSamples(); UINT iter = 0; bool keepTraining = true; Random random; VectorFloat y(M); Vector< UINT > randomTrainingOrder(M); //Init the model model.init( classLabel, N ); //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0) for(UINT i=0; i<M; i++){ y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0; } //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent errorSum = 0; for(UINT m=0; m<M; m++){ //Select the random sample UINT i = randomTrainingOrder[m]; //Compute the error, given the current weights error = y[i] - model.compute( data[i].getSample() ); errorSum += error; //Update the weights for(UINT j=0; j<N; j++){ model.w[j] += learningRate * error * data[i][j]; } model.w0 += learningRate * error; } //Compute the error delta = fabs( errorSum-lastErrorSum ); lastErrorSum = errorSum; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl; } return true; }
bool SelfOrganizingMap::train_( MatrixFloat &data ){ //Clear any previous models clear(); const UINT M = data.getNumRows(); const UINT N = data.getNumCols(); numInputDimensions = N; numOutputDimensions = numClusters*numClusters; Random rand; //Setup the neurons neurons.resize( numClusters, numClusters ); if( neurons.getSize() != numClusters*numClusters ){ errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons matrix, there might not be enough memory!" << std::endl; return false; } //Init the neurons for(UINT i=0; i<numClusters; i++){ for(UINT j=0; j<numClusters; j++){ neurons[i][j].init( N, 0.5, SOM_MIN_TARGET, SOM_MAX_TARGET ); } } //Scale the data if needed ranges = data.getRanges(); if( useScaling ){ for(UINT i=0; i<M; i++){ for(UINT j=0; j<numInputDimensions; j++){ data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,SOM_MIN_TARGET,SOM_MAX_TARGET); } } } Float error = 0; Float lastError = 0; Float trainingSampleError = 0; Float delta = 0; Float minChange = 0; Float weightUpdate = 0; Float alpha = 1.0; Float neuronDiff = 0; Float neuronWeightFunction = 0; Float gamma = 0; UINT iter = 0; bool keepTraining = true; VectorFloat trainingSample; Vector< UINT > randomTrainingOrder(M); //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Enter the main training loop while( keepTraining ){ //Update alpha based on the current iteration alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd); //Run one epoch of training using the online best-matching-unit algorithm error = 0; for(UINT m=0; m<M; m++){ trainingSampleError = 0; //Get the i'th random training sample trainingSample = data.getRowVector( randomTrainingOrder[m] ); //Find the best matching unit Float dist = 0; Float bestDist = grt_numeric_limits< Float >::max(); UINT bestIndexRow = 0; UINT bestIndexCol = 0; for(UINT i=0; i<numClusters; i++){ for(UINT j=0; j<numClusters; j++){ dist = neurons[i][j].getSquaredWeightDistance( trainingSample ); if( dist < bestDist ){ bestDist = dist; bestIndexRow = i; bestIndexCol = j; } } } error += bestDist; //Update the weights based on the distance to the winning neuron //Neurons closer to the winning neuron will have their weights update more const Float bir = bestIndexRow; const Float bic = bestIndexCol; for(UINT i=0; i<numClusters; i++){ for(UINT j=0; j<numClusters; j++){ //Update the weights for all the neurons, pulling them a little closer to the input example neuronDiff = 0; gamma = 2.0 * grt_sqr( numClusters * sigmaWeight ); neuronWeightFunction = exp( -grt_sqr(bir-i)/gamma ) * exp( -grt_sqr(bic-j)/gamma ); //std::cout << "best index: " << bestIndexRow << " " << bestIndexCol << " bestDist: " << bestDist << " pos: " << i << " " << j << " neuronWeightFunction: " << neuronWeightFunction << std::endl; for(UINT n=0; n<N; n++){ neuronDiff = trainingSample[n] - neurons[i][j][n]; weightUpdate = neuronWeightFunction * alpha * neuronDiff; neurons[i][j][n] += weightUpdate; } } } } error = error / M; trainingLog << "iter: " << iter << " average error: " << error << std::endl; //Compute the error delta = fabs( error-lastError ); lastError = error; //Check to see if we should stop if( delta <= minChange && false ){ converged = true; keepTraining = false; } if( grt_isinf( error ) ){ errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl; return false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl; } numTrainingIterationsToConverge = iter; trained = true; return true; }
bool LinearRegression::train(LabelledRegressionData &trainingData){ const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumInputDimensions(); const unsigned int K = trainingData.getNumTargetDimensions(); trained = false; if( M == 0 ){ errorLog << "train(LabelledRegressionData &trainingData) - Training data has zero samples!" << endl; return false; } if( K == 0 ){ errorLog << "train(LabelledRegressionData &trainingData) - The number of target dimensions is not 1!" << endl; return false; } numFeatures = N; numOutputDimensions = 1; //Logistic Regression will have 1 output inputVectorRanges.clear(); targetVectorRanges.clear(); //Scale the training and validation data, if needed if( useScaling ){ //Find the ranges for the input data inputVectorRanges = trainingData.getInputRanges(); //Find the ranges for the target data targetVectorRanges = trainingData.getTargetRanges(); //Scale the training data trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0); } //Reset the weights Random rand; w0 = rand.getRandomNumberUniform(-0.1,0.1); w.resize(N); for(UINT j=0; j<N; j++){ w[j] = rand.getRandomNumberUniform(-0.1,0.1); } double error = 0; double errorSum = 0; double lastErrorSum = 0; double delta = 0; UINT iter = 0; bool keepTraining = true; Random random; vector< UINT > randomTrainingOrder(M); //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent errorSum = 0; for(UINT m=0; m<M; m++){ //Select the random sample UINT i = randomTrainingOrder[m]; //Compute the error, given the current weights VectorDouble x = trainingData[i].getInputVector(); VectorDouble y = trainingData[i].getTargetVector(); double h = w0; for(UINT j=0; j<N; j++){ h += x[j] * w[j]; } error = y[0] - sigmoid( h ); errorSum += error; //Update the weights for(UINT j=0; j<N; j++){ w[j] += learningRate * error * x[j]; } w0 += learningRate * error; } //Compute the error delta = fabs( errorSum-lastErrorSum ); lastErrorSum = errorSum; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumIterations ){ keepTraining = false; } trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << endl; } //Flag that the algorithm has been trained regressionData.resize(1,0); trained = true; return trained; }
bool LogisticRegression::train(LabelledRegressionData trainingData){ const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumInputDimensions(); const unsigned int K = trainingData.getNumTargetDimensions(); trained = false; trainingResults.clear(); if( M == 0 ){ errorLog << "train(LabelledRegressionData trainingData) - Training data has zero samples!" << endl; return false; } if( K == 0 ){ errorLog << "train(LabelledRegressionData trainingData) - The number of target dimensions is not 1!" << endl; return false; } numInputDimensions = N; numOutputDimensions = 1; //Logistic Regression will have 1 output inputVectorRanges.clear(); targetVectorRanges.clear(); //Scale the training and validation data, if needed if( useScaling ){ //Find the ranges for the input data inputVectorRanges = trainingData.getInputRanges(); //Find the ranges for the target data targetVectorRanges = trainingData.getTargetRanges(); //Scale the training data trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0); } //Reset the weights Random rand; w0 = rand.getRandomNumberUniform(-0.1,0.1); w.resize(N); for(UINT j=0; j<N; j++){ w[j] = rand.getRandomNumberUniform(-0.1,0.1); } double error = 0; double lastSquaredError = 0; double delta = 0; UINT iter = 0; bool keepTraining = true; Random random; vector< UINT > randomTrainingOrder(M); TrainingResult result; trainingResults.reserve(M); //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent totalSquaredTrainingError = 0; for(UINT m=0; m<M; m++){ //Select the random sample UINT i = randomTrainingOrder[m]; //Compute the error, given the current weights VectorDouble x = trainingData[i].getInputVector(); VectorDouble y = trainingData[i].getTargetVector(); double h = w0; for(UINT j=0; j<N; j++){ h += x[j] * w[j]; } error = y[0] - sigmoid( h ); totalSquaredTrainingError += SQR(error); //Update the weights for(UINT j=0; j<N; j++){ w[j] += learningRate * error * x[j]; } w0 += learningRate * error; } //Compute the error delta = fabs( totalSquaredTrainingError-lastSquaredError ); lastSquaredError = totalSquaredTrainingError; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } if( isinf( totalSquaredTrainingError ) || isnan( totalSquaredTrainingError ) ){ errorLog << "train(LabelledRegressionData &trainingData) - Training failed! Total squared error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << endl; return false; } //Store the training results rootMeanSquaredTrainingError = sqrt( totalSquaredTrainingError / double(M) ); result.setRegressionResult(iter,totalSquaredTrainingError,rootMeanSquaredTrainingError); trainingResults.push_back( result ); //Notify any observers of the new training data trainingResultsObserverManager.notifyObservers( result ); trainingLog << "Epoch: " << iter << " SSE: " << totalSquaredTrainingError << " Delta: " << delta << endl; } //Flag that the algorithm has been trained regressionData.resize(1,0); trained = true; return trained; }
bool SelfOrganizingMap::train_( MatrixFloat &data ){ //Clear any previous models clear(); const UINT M = data.getNumRows(); const UINT N = data.getNumCols(); numInputDimensions = N; numOutputDimensions = numClusters; Random rand; //Setup the neurons neurons.resize( numClusters ); if( neurons.size() != numClusters ){ errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons Vector, there might not be enough memory!" << std::endl; return false; } for(UINT j=0; j<numClusters; j++){ //Init the neuron neurons[j].init( N, 0.5 ); //Set the weights as a random training example neurons[j].weights = data.getRowVector( rand.getRandomNumberInt(0, M) ); } //Setup the network weights switch( networkTypology ){ case RANDOM_NETWORK: networkWeights.resize(numClusters, numClusters); //Set the diagonal weights as 1 (as i==j) for(UINT i=0; i<numClusters; i++){ networkWeights[i][i] = 1; } //Randomize the other weights UINT indexA = 0; UINT indexB = 0; Float weight = 0; for(UINT i=0; i<numClusters*numClusters; i++){ indexA = rand.getRandomNumberInt(0, numClusters); indexB = rand.getRandomNumberInt(0, numClusters); //Make sure the two random indexs are the same (as this is a diagonal and should be 1) if( indexA != indexB ){ //Pick a random weight between these two neurons weight = rand.getRandomNumberUniform(0,1); //The weight betwen neurons a and b is the mirrored networkWeights[indexA][indexB] = weight; networkWeights[indexB][indexA] = weight; } } break; } //Scale the data if needed ranges = data.getRanges(); if( useScaling ){ for(UINT i=0; i<M; i++){ for(UINT j=0; j<numInputDimensions; j++){ data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1); } } } Float error = 0; Float lastError = 0; Float trainingSampleError = 0; Float delta = 0; Float minChange = 0; Float weightUpdate = 0; Float weightUpdateSum = 0; Float alpha = 1.0; Float neuronDiff = 0; UINT iter = 0; bool keepTraining = true; VectorFloat trainingSample; Vector< UINT > randomTrainingOrder(M); //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Enter the main training loop while( keepTraining ){ //Update alpha based on the current iteration alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd); //Run one epoch of training using the online best-matching-unit algorithm error = 0; for(UINT i=0; i<M; i++){ trainingSampleError = 0; //Get the i'th random training sample trainingSample = data.getRowVector( randomTrainingOrder[i] ); //Find the best matching unit Float dist = 0; Float bestDist = grt_numeric_limits< Float >::max(); UINT bestIndex = 0; for(UINT j=0; j<numClusters; j++){ dist = neurons[j].getSquaredWeightDistance( trainingSample ); if( dist < bestDist ){ bestDist = dist; bestIndex = j; } } //Update the weights based on the distance to the winning neuron //Neurons closer to the winning neuron will have their weights update more for(UINT j=0; j<numClusters; j++){ //Update the weights for the j'th neuron weightUpdateSum = 0; neuronDiff = 0; for(UINT n=0; n<N; n++){ neuronDiff = trainingSample[n] - neurons[j][n]; weightUpdate = networkWeights[bestIndex][j] * alpha * neuronDiff; neurons[j][n] += weightUpdate; weightUpdateSum += neuronDiff; } trainingSampleError += grt_sqr( weightUpdateSum ); } error += grt_sqrt( trainingSampleError / numClusters ); } //Compute the error delta = fabs( error-lastError ); lastError = error; //Check to see if we should stop if( delta <= minChange ){ converged = true; keepTraining = false; } if( grt_isinf( error ) ){ errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl; return false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl; } numTrainingIterationsToConverge = iter; trained = true; return true; }