예제 #1
0
파일: Softmax.cpp 프로젝트: nickgillian/grt
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    const UINT N = data.getNumDimensions();
    const UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    VectorFloat batchMean(N);
    Vector< UINT > randomTrainingOrder(M);
    Vector< VectorFloat > batchData(batchSize,VectorFloat(N));
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Clear any previous training results
    trainingResults.clear();
    trainingResults.reserve( maxNumEpochs );
    TrainingResult epochResult;

    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){

        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        UINT m=0;
        while( m < M ){
          //Get the batch data for this update
          UINT roundSize = m+batchSize < M ? batchSize : M-m;
          batchMean.fill(0.0);
          for(UINT i=0; i<roundSize; i++){
            for(UINT j=0; j<N; j++){
              batchData[i][j] = data[ randomTrainingOrder[m+i] ][j];
              batchMean[j] += batchData[i][j];
            }
          }

          for(UINT j=0; j<N; j++) batchMean[j] /= roundSize;

          //Compute the error on this batch, given the current weights
          error = 0.0;
          for(UINT i=0; i<roundSize; i++){
            error += y[ randomTrainingOrder[m+i] ] - model.compute( batchData[i] );
          }
          error /= roundSize;
          errorSum += error;

          //Update the weights
          for(UINT j=0; j<N; j++){
            model.w[j] += learningRate  * error * batchMean[j];
          }
          model.w0 += learningRate  * error;

          m += roundSize;
        }

        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;

        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Class: " << classLabel << " Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
        epochResult.setClassificationResult( iter, errorSum, this );
        trainingResults.push_back( epochResult );
    }
    
    return true;
}
예제 #2
0
파일: Softmax.cpp 프로젝트: BryanBo-Cao/grt
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    UINT N = data.getNumDimensions();
    UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    Vector< UINT > randomTrainingOrder(M);
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            error = y[i] - model.compute( data[i].getSample() );
            errorSum += error;
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                model.w[j] += learningRate  * error * data[i][j];
            }
            model.w0 += learningRate  * error;
        }
        
        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
    }
    
    return true;
}
예제 #3
0
bool SelfOrganizingMap::train_( MatrixFloat &data ){
    
    //Clear any previous models
    clear();
    
    const UINT M = data.getNumRows();
    const UINT N = data.getNumCols();
    numInputDimensions = N;
    numOutputDimensions = numClusters*numClusters;
    Random rand;
    
    //Setup the neurons
    neurons.resize( numClusters, numClusters );
    
    if( neurons.getSize() != numClusters*numClusters ){
        errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons matrix, there might not be enough memory!" << std::endl;
        return false;
    }
    
    //Init the neurons
    for(UINT i=0; i<numClusters; i++){
        for(UINT j=0; j<numClusters; j++){
            neurons[i][j].init( N, 0.5, SOM_MIN_TARGET, SOM_MAX_TARGET );
        }
    }
    
    //Scale the data if needed
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<M; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,SOM_MIN_TARGET,SOM_MAX_TARGET);
            }
        }
    }
    
    Float error = 0;
    Float lastError = 0;
    Float trainingSampleError = 0;
    Float delta = 0;
    Float minChange = 0;
    Float weightUpdate = 0;
    Float alpha = 1.0;
    Float neuronDiff = 0;
    Float neuronWeightFunction = 0;
    Float gamma = 0;
    UINT iter = 0;
    bool keepTraining = true;
    VectorFloat trainingSample;
    Vector< UINT > randomTrainingOrder(M);
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Enter the main training loop
    while( keepTraining ){
        
        //Update alpha based on the current iteration
        alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd);
        
        //Run one epoch of training using the online best-matching-unit algorithm
        error = 0;
        for(UINT m=0; m<M; m++){
            
            trainingSampleError = 0;
            
            //Get the i'th random training sample
            trainingSample = data.getRowVector( randomTrainingOrder[m] );
            
            //Find the best matching unit
            Float dist = 0;
            Float bestDist = grt_numeric_limits< Float >::max();
            UINT bestIndexRow = 0;
            UINT bestIndexCol = 0;
            for(UINT i=0; i<numClusters; i++){
                for(UINT j=0; j<numClusters; j++){
                    dist = neurons[i][j].getSquaredWeightDistance( trainingSample );
                    if( dist < bestDist ){
                        bestDist = dist;
                        bestIndexRow = i;
                        bestIndexCol = j;
                    }
                }
            }
            error += bestDist;
            
            //Update the weights based on the distance to the winning neuron
            //Neurons closer to the winning neuron will have their weights update more
            const Float bir = bestIndexRow;
            const Float bic = bestIndexCol;
            for(UINT i=0; i<numClusters; i++){  
                for(UINT j=0; j<numClusters; j++){
                
                    //Update the weights for all the neurons, pulling them a little closer to the input example
                    neuronDiff = 0;
                    gamma = 2.0 * grt_sqr( numClusters * sigmaWeight );
                    neuronWeightFunction = exp( -grt_sqr(bir-i)/gamma ) * exp( -grt_sqr(bic-j)/gamma );
                    //std::cout << "best index: " << bestIndexRow << " " << bestIndexCol << " bestDist: " << bestDist << " pos: " << i << " " << j << " neuronWeightFunction: " << neuronWeightFunction << std::endl;
                    for(UINT n=0; n<N; n++){
                        neuronDiff = trainingSample[n] - neurons[i][j][n];
                        weightUpdate = neuronWeightFunction * alpha * neuronDiff;
                        neurons[i][j][n] += weightUpdate;
                    }
                }
            }
        }

        error = error / M;

        trainingLog << "iter: " << iter << " average error: " << error << std::endl;
        
        //Compute the error
        delta = fabs( error-lastError );
        lastError = error;
        
        //Check to see if we should stop
        if( delta <= minChange && false ){
            converged = true;
            keepTraining = false;
        }
        
        if( grt_isinf( error ) ){
            errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl;
            return false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl;
    }
    
    numTrainingIterationsToConverge = iter;
    trained = true;
    
    return true;
}
예제 #4
0
bool LinearRegression::train(LabelledRegressionData &trainingData){
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumInputDimensions();
    const unsigned int K = trainingData.getNumTargetDimensions();
    trained = false;
    
    if( M == 0 ){
        errorLog << "train(LabelledRegressionData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( K == 0 ){
        errorLog << "train(LabelledRegressionData &trainingData) - The number of target dimensions is not 1!" << endl;
        return false;
    }
    
    numFeatures = N;
    numOutputDimensions = 1; //Logistic Regression will have 1 output
    inputVectorRanges.clear();
    targetVectorRanges.clear();
    
    //Scale the training and validation data, if needed
	if( useScaling ){
		//Find the ranges for the input data
        inputVectorRanges = trainingData.getInputRanges();
        
        //Find the ranges for the target data
		targetVectorRanges = trainingData.getTargetRanges();
        
		//Scale the training data
		trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
	}
    
    //Reset the weights
    Random rand;
    w0 = rand.getRandomNumberUniform(-0.1,0.1);
    w.resize(N);
    for(UINT j=0; j<N; j++){
        w[j] = rand.getRandomNumberUniform(-0.1,0.1);
    }

    double error = 0;
    double errorSum = 0;
    double lastErrorSum = 0;
    double delta = 0;
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    vector< UINT > randomTrainingOrder(M);
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            VectorDouble x = trainingData[i].getInputVector();
            VectorDouble y = trainingData[i].getTargetVector();
            double h = w0;
            for(UINT j=0; j<N; j++){
                h += x[j] * w[j];
            }
            error = y[0] - sigmoid( h );
            errorSum += error;
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                w[j] += learningRate  * error * x[j];
            }
            w0 += learningRate * error;
        }
        
        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumIterations ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << endl;
    }
    
    //Flag that the algorithm has been trained
    regressionData.resize(1,0);
    trained = true;
    return trained;
}
bool LogisticRegression::train(LabelledRegressionData trainingData){
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumInputDimensions();
    const unsigned int K = trainingData.getNumTargetDimensions();
    trained = false;
    trainingResults.clear();
    
    if( M == 0 ){
        errorLog << "train(LabelledRegressionData trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( K == 0 ){
        errorLog << "train(LabelledRegressionData trainingData) - The number of target dimensions is not 1!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numOutputDimensions = 1; //Logistic Regression will have 1 output
    inputVectorRanges.clear();
    targetVectorRanges.clear();
    
    //Scale the training and validation data, if needed
	if( useScaling ){
		//Find the ranges for the input data
        inputVectorRanges = trainingData.getInputRanges();
        
        //Find the ranges for the target data
		targetVectorRanges = trainingData.getTargetRanges();
        
		//Scale the training data
		trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
	}
    
    //Reset the weights
    Random rand;
    w0 = rand.getRandomNumberUniform(-0.1,0.1);
    w.resize(N);
    for(UINT j=0; j<N; j++){
        w[j] = rand.getRandomNumberUniform(-0.1,0.1);
    }

    double error = 0;
    double lastSquaredError = 0;
    double delta = 0;
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    vector< UINT > randomTrainingOrder(M);
    TrainingResult result;
    trainingResults.reserve(M);
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        totalSquaredTrainingError = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            VectorDouble x = trainingData[i].getInputVector();
            VectorDouble y = trainingData[i].getTargetVector();
            double h = w0;
            for(UINT j=0; j<N; j++){
                h += x[j] * w[j];
            }
            error = y[0] - sigmoid( h );
            totalSquaredTrainingError += SQR(error);
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                w[j] += learningRate * error * x[j];
            }
            w0 += learningRate * error;
        }
        
        //Compute the error
        delta = fabs( totalSquaredTrainingError-lastSquaredError );
        lastSquaredError = totalSquaredTrainingError;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        if( isinf( totalSquaredTrainingError ) || isnan( totalSquaredTrainingError ) ){
            errorLog << "train(LabelledRegressionData &trainingData) - Training failed! Total squared error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << endl;
            return false;
        }
        
        //Store the training results
        rootMeanSquaredTrainingError = sqrt( totalSquaredTrainingError / double(M) );
        result.setRegressionResult(iter,totalSquaredTrainingError,rootMeanSquaredTrainingError);
        trainingResults.push_back( result );
        
        //Notify any observers of the new training data
        trainingResultsObserverManager.notifyObservers( result );
        
        trainingLog << "Epoch: " << iter << " SSE: " << totalSquaredTrainingError << " Delta: " << delta << endl;
    }
    
    //Flag that the algorithm has been trained
    regressionData.resize(1,0);
    trained = true;
    return trained;
}
예제 #6
0
bool SelfOrganizingMap::train_( MatrixFloat &data ){
    
    //Clear any previous models
    clear();
    
    const UINT M = data.getNumRows();
    const UINT N = data.getNumCols();
    numInputDimensions = N;
    numOutputDimensions = numClusters;
    Random rand;
    
    //Setup the neurons
    neurons.resize( numClusters );
    
    if( neurons.size() != numClusters ){
        errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons Vector, there might not be enough memory!" << std::endl;
        return false;
    }
    
    for(UINT j=0; j<numClusters; j++){
        
        //Init the neuron
        neurons[j].init( N, 0.5 );
        
        //Set the weights as a random training example
        neurons[j].weights = data.getRowVector( rand.getRandomNumberInt(0, M) );
    }
    
    //Setup the network weights
    switch( networkTypology ){
        case RANDOM_NETWORK:
            networkWeights.resize(numClusters, numClusters);
            
            //Set the diagonal weights as 1 (as i==j)
            for(UINT i=0; i<numClusters; i++){
                networkWeights[i][i] = 1;
            }
            
            //Randomize the other weights
            UINT indexA = 0;
            UINT indexB = 0;
            Float weight = 0;
            for(UINT i=0; i<numClusters*numClusters; i++){
                indexA = rand.getRandomNumberInt(0, numClusters);
                indexB = rand.getRandomNumberInt(0, numClusters);
                
                //Make sure the two random indexs are the same (as this is a diagonal and should be 1)
                if( indexA != indexB ){
                    //Pick a random weight between these two neurons
                    weight = rand.getRandomNumberUniform(0,1);
                    
                    //The weight betwen neurons a and b is the mirrored
                    networkWeights[indexA][indexB] = weight;
                    networkWeights[indexB][indexA] = weight;
                }
            }
            break;
    }
    
    //Scale the data if needed
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<M; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
            }
        }
    }
    
    Float error = 0;
    Float lastError = 0;
    Float trainingSampleError = 0;
    Float delta = 0;
    Float minChange = 0;
    Float weightUpdate = 0;
    Float weightUpdateSum = 0;
    Float alpha = 1.0;
    Float neuronDiff = 0;
    UINT iter = 0;
    bool keepTraining = true;
    VectorFloat trainingSample;
    Vector< UINT > randomTrainingOrder(M);
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Enter the main training loop
    while( keepTraining ){
        
        //Update alpha based on the current iteration
        alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd);
        
        //Run one epoch of training using the online best-matching-unit algorithm
        error = 0;
        for(UINT i=0; i<M; i++){
            
            trainingSampleError = 0;
            
            //Get the i'th random training sample
            trainingSample = data.getRowVector( randomTrainingOrder[i] );
            
            //Find the best matching unit
            Float dist = 0;
            Float bestDist = grt_numeric_limits< Float >::max();
            UINT bestIndex = 0;
            for(UINT j=0; j<numClusters; j++){
                dist = neurons[j].getSquaredWeightDistance( trainingSample );
                if( dist < bestDist ){
                    bestDist = dist;
                    bestIndex = j;
                }
            }
            
            //Update the weights based on the distance to the winning neuron
            //Neurons closer to the winning neuron will have their weights update more
            for(UINT j=0; j<numClusters; j++){
                
                //Update the weights for the j'th neuron
                weightUpdateSum = 0;
                neuronDiff = 0;
                for(UINT n=0; n<N; n++){
                    neuronDiff = trainingSample[n] - neurons[j][n];
                    weightUpdate = networkWeights[bestIndex][j] * alpha * neuronDiff;
                    neurons[j][n] += weightUpdate;
                    weightUpdateSum += neuronDiff;
                }
                
                trainingSampleError += grt_sqr( weightUpdateSum );
            }
            
            error += grt_sqrt( trainingSampleError / numClusters );
        }
        
        //Compute the error
        delta = fabs( error-lastError );
        lastError = error;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            converged = true;
            keepTraining = false;
        }
        
        if( grt_isinf( error ) ){
            errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl;
            return false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl;
    }
    
    numTrainingIterationsToConverge = iter;
    trained = true;
    
    return true;
}