Beispiel #1
0
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    const UINT N = data.getNumDimensions();
    const UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    VectorFloat batchMean(N);
    Vector< UINT > randomTrainingOrder(M);
    Vector< VectorFloat > batchData(batchSize,VectorFloat(N));
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Clear any previous training results
    trainingResults.clear();
    trainingResults.reserve( maxNumEpochs );
    TrainingResult epochResult;

    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){

        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        UINT m=0;
        while( m < M ){
          //Get the batch data for this update
          UINT roundSize = m+batchSize < M ? batchSize : M-m;
          batchMean.fill(0.0);
          for(UINT i=0; i<roundSize; i++){
            for(UINT j=0; j<N; j++){
              batchData[i][j] = data[ randomTrainingOrder[m+i] ][j];
              batchMean[j] += batchData[i][j];
            }
          }

          for(UINT j=0; j<N; j++) batchMean[j] /= roundSize;

          //Compute the error on this batch, given the current weights
          error = 0.0;
          for(UINT i=0; i<roundSize; i++){
            error += y[ randomTrainingOrder[m+i] ] - model.compute( batchData[i] );
          }
          error /= roundSize;
          errorSum += error;

          //Update the weights
          for(UINT j=0; j<N; j++){
            model.w[j] += learningRate  * error * batchMean[j];
          }
          model.w0 += learningRate  * error;

          m += roundSize;
        }

        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;

        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Class: " << classLabel << " Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
        epochResult.setClassificationResult( iter, errorSum, this );
        trainingResults.push_back( epochResult );
    }
    
    return true;
}
Beispiel #2
0
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    UINT N = data.getNumDimensions();
    UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    Vector< UINT > randomTrainingOrder(M);
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            error = y[i] - model.compute( data[i].getSample() );
            errorSum += error;
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                model.w[j] += learningRate  * error * data[i][j];
            }
            model.w0 += learningRate  * error;
        }
        
        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
    }
    
    return true;
}