Exemple #1
0
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    const UINT N = data.getNumDimensions();
    const UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    VectorFloat batchMean(N);
    Vector< UINT > randomTrainingOrder(M);
    Vector< VectorFloat > batchData(batchSize,VectorFloat(N));
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Clear any previous training results
    trainingResults.clear();
    trainingResults.reserve( maxNumEpochs );
    TrainingResult epochResult;

    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){

        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        UINT m=0;
        while( m < M ){
          //Get the batch data for this update
          UINT roundSize = m+batchSize < M ? batchSize : M-m;
          batchMean.fill(0.0);
          for(UINT i=0; i<roundSize; i++){
            for(UINT j=0; j<N; j++){
              batchData[i][j] = data[ randomTrainingOrder[m+i] ][j];
              batchMean[j] += batchData[i][j];
            }
          }

          for(UINT j=0; j<N; j++) batchMean[j] /= roundSize;

          //Compute the error on this batch, given the current weights
          error = 0.0;
          for(UINT i=0; i<roundSize; i++){
            error += y[ randomTrainingOrder[m+i] ] - model.compute( batchData[i] );
          }
          error /= roundSize;
          errorSum += error;

          //Update the weights
          for(UINT j=0; j<N; j++){
            model.w[j] += learningRate  * error * batchMean[j];
          }
          model.w0 += learningRate  * error;

          m += roundSize;
        }

        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;

        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Class: " << classLabel << " Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
        epochResult.setClassificationResult( iter, errorSum, this );
        trainingResults.push_back( epochResult );
    }
    
    return true;
}
bool LogisticRegression::train(LabelledRegressionData trainingData){
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumInputDimensions();
    const unsigned int K = trainingData.getNumTargetDimensions();
    trained = false;
    trainingResults.clear();
    
    if( M == 0 ){
        errorLog << "train(LabelledRegressionData trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( K == 0 ){
        errorLog << "train(LabelledRegressionData trainingData) - The number of target dimensions is not 1!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numOutputDimensions = 1; //Logistic Regression will have 1 output
    inputVectorRanges.clear();
    targetVectorRanges.clear();
    
    //Scale the training and validation data, if needed
	if( useScaling ){
		//Find the ranges for the input data
        inputVectorRanges = trainingData.getInputRanges();
        
        //Find the ranges for the target data
		targetVectorRanges = trainingData.getTargetRanges();
        
		//Scale the training data
		trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
	}
    
    //Reset the weights
    Random rand;
    w0 = rand.getRandomNumberUniform(-0.1,0.1);
    w.resize(N);
    for(UINT j=0; j<N; j++){
        w[j] = rand.getRandomNumberUniform(-0.1,0.1);
    }

    double error = 0;
    double lastSquaredError = 0;
    double delta = 0;
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    vector< UINT > randomTrainingOrder(M);
    TrainingResult result;
    trainingResults.reserve(M);
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        totalSquaredTrainingError = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            VectorDouble x = trainingData[i].getInputVector();
            VectorDouble y = trainingData[i].getTargetVector();
            double h = w0;
            for(UINT j=0; j<N; j++){
                h += x[j] * w[j];
            }
            error = y[0] - sigmoid( h );
            totalSquaredTrainingError += SQR(error);
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                w[j] += learningRate * error * x[j];
            }
            w0 += learningRate * error;
        }
        
        //Compute the error
        delta = fabs( totalSquaredTrainingError-lastSquaredError );
        lastSquaredError = totalSquaredTrainingError;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        if( isinf( totalSquaredTrainingError ) || isnan( totalSquaredTrainingError ) ){
            errorLog << "train(LabelledRegressionData &trainingData) - Training failed! Total squared error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << endl;
            return false;
        }
        
        //Store the training results
        rootMeanSquaredTrainingError = sqrt( totalSquaredTrainingError / double(M) );
        result.setRegressionResult(iter,totalSquaredTrainingError,rootMeanSquaredTrainingError);
        trainingResults.push_back( result );
        
        //Notify any observers of the new training data
        trainingResultsObserverManager.notifyObservers( result );
        
        trainingLog << "Epoch: " << iter << " SSE: " << totalSquaredTrainingError << " Delta: " << delta << endl;
    }
    
    //Flag that the algorithm has been trained
    regressionData.resize(1,0);
    trained = true;
    return trained;
}
Exemple #3
0
bool AdaBoost::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    if( trainingData.getNumSamples() <= 1 ){
        errorLog << "train_(ClassificationData &trainingData) - There are not enough training samples to train a model! Number of samples: " << trainingData.getNumSamples()  << endl;
        return false;
    }
    
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();
    const UINT M = trainingData.getNumSamples();
    const UINT POSITIVE_LABEL = WEAK_CLASSIFIER_POSITIVE_CLASS_LABEL;
    const UINT NEGATIVE_LABEL = WEAK_CLASSIFIER_NEGATIVE_CLASS_LABEL;
    double alpha = 0;
    const double beta = 0.001;
    double epsilon = 0;
    TrainingResult trainingResult;
    
    const UINT K = (UINT)weakClassifiers.size();
    if( K == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - No weakClassifiers have been set. You need to set at least one weak classifier first." << endl;
        return false;
    }

    classLabels.resize(numClasses);
    models.resize(numClasses);
    ranges = trainingData.getRanges();

    //Scale the training data if needed
    if( useScaling ){
        trainingData.scale(ranges,0,1);
    }
    
    //Create the weights vector
    VectorDouble weights(M);
    
    //Create the error matrix
    MatrixDouble errorMatrix(K,M);
    
    for(UINT classIter=0; classIter<numClasses; classIter++){
        
        //Get the class label for the current class
        classLabels[classIter] = trainingData.getClassLabels()[classIter];
        
        //Set the class label of the current model
        models[ classIter ].setClassLabel( classLabels[classIter] );
        
        //Setup the labels for this class, POSITIVE_LABEL == 1, NEGATIVE_LABEL == 2
        ClassificationData classData;
        classData.setNumDimensions(trainingData.getNumDimensions());
        for(UINT i=0; i<M; i++){
            UINT label = trainingData[i].getClassLabel()==classLabels[classIter] ? POSITIVE_LABEL : NEGATIVE_LABEL;
            VectorDouble trainingSample = trainingData[i].getSample();
            classData.addSample(label,trainingSample);
        }
        
        //Setup the initial training sample weights
        std::fill(weights.begin(),weights.end(),1.0/M);
        
        //Run the boosting loop
        bool keepBoosting = true;
        UINT t = 0;
        
        while( keepBoosting ){
            
            //Pick the classifier from the family of classifiers that minimizes the total error
            UINT bestClassifierIndex = 0;
            double minError = numeric_limits<double>::max();
            for(UINT k=0; k<K; k++){
                //Get the k'th possible classifier
                WeakClassifier *weakLearner = weakClassifiers[k];
                
                //Train the current classifier
                if( !weakLearner->train(classData,weights) ){
                    errorLog << "Failed to train weakLearner!" << endl;
                    return false;
                }
                
                //Compute the weighted error for this clasifier
                double e = 0;
                double positiveLabel = weakLearner->getPositiveClassLabel();
                double numCorrect = 0;
                double numIncorrect = 0;
                for(UINT i=0; i<M; i++){
                    //Only penalize errors
                    double prediction = weakLearner->predict( classData[i].getSample() );
                    
                    if( (prediction == positiveLabel && classData[i].getClassLabel() != POSITIVE_LABEL) ||        //False positive
                        (prediction != positiveLabel && classData[i].getClassLabel() == POSITIVE_LABEL) ){       //False negative
                        e += weights[i]; //Increase the error proportional to the weight of the example
                        errorMatrix[k][i] = 1; //Flag that there was an error
                        numIncorrect++;
                    }else{
                        errorMatrix[k][i] = 0; //Flag that there was no error
                        numCorrect++;
                    }
                }
                
                trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Classifier: " << k << " WeightedError: " << e << " NumCorrect: " << numCorrect/M << " NumIncorrect: " <<numIncorrect/M << endl;
                
                if( e < minError ){
                    minError = e;
                    bestClassifierIndex = k;
                }
                
            }
  
            epsilon = minError;
            
            //Set alpha, using the M1 weight value, small weights (close to 0) will receive a strong weight in the final classifier
            alpha = 0.5 * log( (1.0-epsilon)/epsilon );
            
            trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Best Classifier Index: " << bestClassifierIndex << " MinError: " << minError << " Alpha: " << alpha << endl;
            
            if( isinf(alpha) ){ keepBoosting = false; trainingLog << "Alpha is INF. Stopping boosting for current class" << endl; }
            if( 0.5 - epsilon <= beta ){ keepBoosting = false; trainingLog << "Epsilon <= Beta. Stopping boosting for current class" << endl; }
            if( ++t >= numBoostingIterations ) keepBoosting = false;

            trainingResult.setClassificationResult(t, minError, this);
            trainingResults.push_back(trainingResult);
            trainingResultsObserverManager.notifyObservers( trainingResult );
            
            if( keepBoosting ){
                
                //Add the best weak classifier to the committee
                models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha );
                
                //Update the weights for the next boosting iteration
                double reWeight = (1.0 - epsilon) / epsilon;
                double oldSum = 0;
                double newSum = 0;
                for(UINT i=0; i<M; i++){
                    oldSum += weights[i];
                    //Only update the weights that resulted in an incorrect prediction
                    if( errorMatrix[bestClassifierIndex][i] == 1 ) weights[i] *= reWeight;
                    newSum += weights[i];
                }
                
                //Normalize all the weights
                //This results to increasing the weights of the samples that were incorrectly labelled
                //While decreasing the weights of the samples that were correctly classified
                reWeight = oldSum/newSum;
                for(UINT i=0; i<M; i++){
                    weights[i] *= reWeight;
                }
                
            }else{
                trainingLog << "Stopping boosting training at iteration : " << t-1 << " with an error of " << epsilon << endl;
                if( t-1 == 0 ){
                    //Add the best weak classifier to the committee (we have to add it as this is the first iteration)
                    if( isinf(alpha) ){ alpha = 1; } //If alpha is infinite then the first classifier got everything correct
                    models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha );
                }
            }
            
        }
    }
    
    //Normalize the weights
    for(UINT k=0; k<numClasses; k++){
        models[k].normalizeWeights();
    }
    
    //Flag that the model has been trained
    trained = true;
    
    //Setup the data for prediction
    predictedClassLabel = 0;
    maxLikelihood = 0;
    classLikelihoods.resize(numClasses);
    classDistances.resize(numClasses);
    
    return true;
}
Exemple #4
0
bool BernoulliRBM::train_(MatrixFloat &data){
    
    const UINT numTrainingSamples = data.getNumRows();
    numInputDimensions = data.getNumCols();
    numOutputDimensions = numHiddenUnits;
    numVisibleUnits = numInputDimensions;
    
    trainingLog << "NumInputDimensions: " << numInputDimensions << std::endl;
    trainingLog << "NumOutputDimensions: " << numOutputDimensions << std::endl;
    
    if( randomizeWeightsForTraining ){
    
        //Init the weights matrix
        weightsMatrix.resize(numHiddenUnits, numVisibleUnits);
        
        Float a = 1.0 / numVisibleUnits;
        for(UINT i=0; i<numHiddenUnits; i++) {
            for(UINT j=0; j<numVisibleUnits; j++) {
                weightsMatrix[i][j] = rand.getRandomNumberUniform(-a, a);
            }
        }

        //Init the bias units
        visibleLayerBias.resize( numVisibleUnits );
        hiddenLayerBias.resize( numHiddenUnits );
        std::fill(visibleLayerBias.begin(),visibleLayerBias.end(),0);
        std::fill(hiddenLayerBias.begin(),hiddenLayerBias.end(),0);
        
    }else{
        if( weightsMatrix.getNumRows() != numHiddenUnits ){
            errorLog << "train_(MatrixFloat &data) - Weights matrix row size does not match the number of hidden units!" << std::endl;
            return false;
        }
        if( weightsMatrix.getNumCols() != numVisibleUnits ){
            errorLog << "train_(MatrixFloat &data) - Weights matrix row size does not match the number of visible units!" << std::endl;
            return false;
        }
        if( visibleLayerBias.size() != numVisibleUnits ){
            errorLog << "train_(MatrixFloat &data) - Visible layer bias size does not match the number of visible units!" << std::endl;
            return false;
        }
        if( hiddenLayerBias.size() != numHiddenUnits ){
            errorLog << "train_(MatrixFloat &data) - Hidden layer bias size does not match the number of hidden units!" << std::endl;
            return false;
        }
    }
    
    //Flag the model has been trained encase the user wants to save the model during a training iteration using an observer
    trained = true;
    
    //Make sure the data is scaled between [0 1]
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<numTrainingSamples; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = grt_scale(data[i][j], ranges[j].minValue, ranges[j].maxValue, 0.0, 1.0);
            }
        }
    }
    

    const UINT numBatches = static_cast<UINT>( ceil( Float(numTrainingSamples)/batchSize ) );
    
    //Setup the batch indexs
    Vector< BatchIndexs > batchIndexs( numBatches );
    UINT startIndex = 0;
    for(UINT i=0; i<numBatches; i++){
        batchIndexs[i].startIndex = startIndex;
        batchIndexs[i].endIndex = startIndex + batchSize;
        
        //Make sure the last batch end index is not larger than the number of training examples
        if( batchIndexs[i].endIndex >= numTrainingSamples ){
            batchIndexs[i].endIndex = numTrainingSamples;
        }
        
        //Get the batch size
        batchIndexs[i].batchSize = batchIndexs[i].endIndex - batchIndexs[i].startIndex;
        
        //Set the start index for the next batch
        startIndex = batchIndexs[i].endIndex;
    }
    
    Timer timer;
    UINT i,j,n,epoch,noChangeCounter = 0;
    Float startTime = 0;
    Float alpha = learningRate;
    Float error = 0;
    Float err = 0;
    Float delta = 0;
    Float lastError = 0;
    Vector< UINT > indexList(numTrainingSamples);
    TrainingResult trainingResult;
    MatrixFloat wT( numVisibleUnits, numHiddenUnits );       //Stores a transposed copy of the weights vector
    MatrixFloat vW( numHiddenUnits, numVisibleUnits );       //Stores the weight velocity updates
    MatrixFloat tmpW( numHiddenUnits, numVisibleUnits );     //Stores the weight values that will be used to update the main weights matrix at each batch update
    MatrixFloat v1( batchSize, numVisibleUnits );            //Stores the real batch data during a batch update
    MatrixFloat v2( batchSize, numVisibleUnits );            //Stores the sampled batch data during a batch update
    MatrixFloat h1( batchSize, numHiddenUnits );             //Stores the hidden states given v1 and the current weightsMatrix
    MatrixFloat h2( batchSize, numHiddenUnits );             //Stores the sampled hidden states given v2 and the current weightsMatrix
    MatrixFloat c1( numHiddenUnits, numVisibleUnits );       //Stores h1' * v1
    MatrixFloat c2( numHiddenUnits, numVisibleUnits );       //Stores h2' * v2
    MatrixFloat vDiff( batchSize, numVisibleUnits );         //Stores the difference between v1-v2
    MatrixFloat hDiff( batchSize, numVisibleUnits );         //Stores the difference between h1-h2
    MatrixFloat cDiff( numHiddenUnits, numVisibleUnits );    //Stores the difference between c1-c2
    VectorFloat vDiffSum( numVisibleUnits );                 //Stores the column sum of vDiff
    VectorFloat hDiffSum( numHiddenUnits );                  //Stores the column sum of hDiff
    VectorFloat visibleLayerBiasVelocity( numVisibleUnits ); //Stores the velocity update of the visibleLayerBias
    VectorFloat hiddenLayerBiasVelocity( numHiddenUnits );   //Stores the velocity update of the hiddenLayerBias
    
    //Set all the velocity weights to zero
    vW.setAllValues( 0 );
    std::fill(visibleLayerBiasVelocity.begin(),visibleLayerBiasVelocity.end(),0);
    std::fill(hiddenLayerBiasVelocity.begin(),hiddenLayerBiasVelocity.end(),0);
    
    //Randomize the order that the training samples will be used in
    for(UINT i=0; i<numTrainingSamples; i++) indexList[i] = i;
    if( randomiseTrainingOrder ){
        std::random_shuffle(indexList.begin(), indexList.end());
    }
    
    //Start the main training loop
    timer.start();
    for(epoch=0; epoch<maxNumEpochs; epoch++) {
        startTime = timer.getMilliSeconds();
        error = 0;
        
        //Randomize the batch order
        std::random_shuffle(batchIndexs.begin(),batchIndexs.end());
        
        //Run each of the batch updates
        for(UINT k=0; k<numBatches; k+=batchStepSize){
            
            //Resize the data matrices, the matrices will only be resized if the rows cols are different
            v1.resize( batchIndexs[k].batchSize, numVisibleUnits );
            h1.resize( batchIndexs[k].batchSize, numHiddenUnits );
            v2.resize( batchIndexs[k].batchSize, numVisibleUnits );
            h2.resize( batchIndexs[k].batchSize, numHiddenUnits );
            
            //Setup the data pointers, using data pointers saves a few ms on large matrix updates
            Float **w_p = weightsMatrix.getDataPointer();
            Float **wT_p = wT.getDataPointer();
            Float **vW_p = vW.getDataPointer();
            Float **data_p = data.getDataPointer();
            Float **v1_p = v1.getDataPointer();
            Float **v2_p = v2.getDataPointer();
            Float **h1_p = h1.getDataPointer();
            Float **h2_p = h2.getDataPointer();
            Float *vlb_p = &visibleLayerBias[0];
            Float *hlb_p = &hiddenLayerBias[0];
            
            //Get the batch data
            UINT index = 0;
            for(i=batchIndexs[k].startIndex; i<batchIndexs[k].endIndex; i++){
                for(j=0; j<numVisibleUnits; j++){
                    v1_p[index][j] = data_p[ indexList[i] ][j];
                }
                index++;
            }
            
            //Copy a transposed version of the weights matrix, this is used to compute h1 and h2
            for(i=0; i<numHiddenUnits; i++)
                for(j=0; j<numVisibleUnits; j++)
                    wT_p[j][i] = w_p[i][j];
            
            //Compute h1
            h1.multiple(v1, wT);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numHiddenUnits; i++){
                    h1_p[n][i] = sigmoidRandom( h1_p[n][i] + hlb_p[i] );
                }
            }
            
            //Compute v2
            v2.multiple(h1, weightsMatrix);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numVisibleUnits; i++){
                    v2_p[n][i] = sigmoidRandom( v2_p[n][i] + vlb_p[i] );
                }
            }
            
            //Compute h2
            h2.multiple(v2,wT);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numHiddenUnits; i++){
                    h2_p[n][i] = grt_sigmoid( h2_p[n][i] + hlb_p[i] );
                }
            }
            
            //Compute c1, c2 and the difference between v1-v2
            c1.multiple(h1,v1,true);
            c2.multiple(h2,v2,true);
            vDiff.subtract(v1, v2);
            
            //Compute the sum of vdiff
            for(j=0; j<numVisibleUnits; j++){
                vDiffSum[j] = 0;
                for(i=0; i<batchIndexs[k].batchSize; i++){
                    vDiffSum[j] += vDiff[i][j];
                }
            }
            
            //Compute the difference between h1 and h2
            hDiff.subtract(h1, h2);
            for(j=0; j<numHiddenUnits; j++){
                hDiffSum[j] = 0;
                for(i=0; i<batchIndexs[k].batchSize; i++){
                    hDiffSum[j] += hDiff[i][j];
                }
            }
            
            //Compute the difference between c1 and c2
            cDiff.subtract(c1,c2);
            
            //Update the weight velocities
            for(i=0; i<numHiddenUnits; i++){
                for(j=0; j<numVisibleUnits; j++){
                    vW_p[i][j] = ((momentum * vW_p[i][j]) + (alpha * cDiff[i][j])) / batchIndexs[k].batchSize;
                }
            }
            for(i=0; i<numVisibleUnits; i++){
                visibleLayerBiasVelocity[i] = ((momentum * visibleLayerBiasVelocity[i]) + (alpha * vDiffSum[i])) / batchIndexs[k].batchSize;
            }
            for(i=0; i<numHiddenUnits; i++){
                hiddenLayerBiasVelocity[i] = ((momentum * hiddenLayerBiasVelocity[i]) + (alpha * hDiffSum[i])) / batchIndexs[k].batchSize;
            }
            
            //Update the weights
            weightsMatrix.add( vW );
            
            //Update the bias for the visible layer
            for(i=0; i<numVisibleUnits; i++){
                visibleLayerBias[i] += visibleLayerBiasVelocity[i];
            }
            
            //Update the bias for the visible layer
            for(i=0; i<numHiddenUnits; i++){
                hiddenLayerBias[i] += hiddenLayerBiasVelocity[i];
            }
            
            //Compute the reconstruction error
            err = 0;
            for(i=0; i<batchIndexs[k].batchSize; i++){
                for(j=0; j<numVisibleUnits; j++){
                    err += SQR( v1[i][j] - v2[i][j] );
                }
            }
            
            error += err / batchIndexs[k].batchSize;
        }
        error /= numBatches;
        delta = lastError - error;
        lastError = error;
        
        trainingLog << "Epoch: " << epoch+1 << "/" << maxNumEpochs;
        trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds";
        trainingLog << " Learning rate: " << alpha;
        trainingLog << " Momentum: " << momentum;
        trainingLog << " Average reconstruction error: " << error;
        trainingLog << " Delta: " << delta << std::endl;
        
        //Update the learning rate
        alpha *= learningRateUpdate;
        
        trainingResult.setClassificationResult(epoch, error, this);
        trainingResults.push_back(trainingResult);
        trainingResultsObserverManager.notifyObservers( trainingResult );
        
        //Check for convergance
        if( fabs(delta) < minChange ){
            if( ++noChangeCounter >= minNumEpochs ){
                trainingLog << "Stopping training. MinChange limit reached!" << std::endl;
                break;
            }
        }else noChangeCounter = 0;
        
    }
    trainingLog << "Training complete after " << epoch << " epochs. Total training time: " << timer.getMilliSeconds()/1000.0 << " seconds" << std::endl;
    
    trained = true;
    
    return true;
}