bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){ Float error = 0; Float errorSum = 0; Float lastErrorSum = 0; Float delta = 0; const UINT N = data.getNumDimensions(); const UINT M = data.getNumSamples(); UINT iter = 0; bool keepTraining = true; Random random; VectorFloat y(M); VectorFloat batchMean(N); Vector< UINT > randomTrainingOrder(M); Vector< VectorFloat > batchData(batchSize,VectorFloat(N)); //Init the model model.init( classLabel, N ); //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0) for(UINT i=0; i<M; i++){ y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0; } //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Clear any previous training results trainingResults.clear(); trainingResults.reserve( maxNumEpochs ); TrainingResult epochResult; //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent errorSum = 0; UINT m=0; while( m < M ){ //Get the batch data for this update UINT roundSize = m+batchSize < M ? batchSize : M-m; batchMean.fill(0.0); for(UINT i=0; i<roundSize; i++){ for(UINT j=0; j<N; j++){ batchData[i][j] = data[ randomTrainingOrder[m+i] ][j]; batchMean[j] += batchData[i][j]; } } for(UINT j=0; j<N; j++) batchMean[j] /= roundSize; //Compute the error on this batch, given the current weights error = 0.0; for(UINT i=0; i<roundSize; i++){ error += y[ randomTrainingOrder[m+i] ] - model.compute( batchData[i] ); } error /= roundSize; errorSum += error; //Update the weights for(UINT j=0; j<N; j++){ model.w[j] += learningRate * error * batchMean[j]; } model.w0 += learningRate * error; m += roundSize; } //Compute the error delta = fabs( errorSum-lastErrorSum ); lastErrorSum = errorSum; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Class: " << classLabel << " Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl; epochResult.setClassificationResult( iter, errorSum, this ); trainingResults.push_back( epochResult ); } return true; }
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){ Float error = 0; Float errorSum = 0; Float lastErrorSum = 0; Float delta = 0; UINT N = data.getNumDimensions(); UINT M = data.getNumSamples(); UINT iter = 0; bool keepTraining = true; Random random; VectorFloat y(M); Vector< UINT > randomTrainingOrder(M); //Init the model model.init( classLabel, N ); //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0) for(UINT i=0; i<M; i++){ y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0; } //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent errorSum = 0; for(UINT m=0; m<M; m++){ //Select the random sample UINT i = randomTrainingOrder[m]; //Compute the error, given the current weights error = y[i] - model.compute( data[i].getSample() ); errorSum += error; //Update the weights for(UINT j=0; j<N; j++){ model.w[j] += learningRate * error * data[i][j]; } model.w0 += learningRate * error; } //Compute the error delta = fabs( errorSum-lastErrorSum ); lastErrorSum = errorSum; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl; } return true; }