Пример #1
0
bool GaussianMixtureModels::train_(MatrixFloat &data){
    
    trained = false;
    
    //Clear any previous training results
    det.clear();
    invSigma.clear();
    numTrainingIterationsToConverge = 0;
    
    if( data.getNumRows() == 0 ){
        errorLog << "train_(MatrixFloat &data) - Training Failed! Training data is empty!" << std::endl;
        return false;
    }
    
    //Resize the variables
    numTrainingSamples = data.getNumRows();
    numInputDimensions = data.getNumCols();
    
    //Resize mu and resp
    mu.resize(numClusters,numInputDimensions);
    resp.resize(numTrainingSamples,numClusters);
    
    //Resize sigma
    sigma.resize(numClusters);
    for(UINT k=0; k<numClusters; k++){
        sigma[k].resize(numInputDimensions,numInputDimensions);
    }
    
    //Resize frac and lndets
    frac.resize(numClusters);
    lndets.resize(numClusters);
    
    //Scale the data if needed
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<numTrainingSamples; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
            }
        }
    }
    
    //Pick K random starting points for the inital guesses of Mu
    Random random;
    Vector< UINT > randomIndexs(numTrainingSamples);
    for(UINT i=0; i<numTrainingSamples; i++) randomIndexs[i] = i;
    for(UINT i=0; i<numClusters; i++){
        SWAP(randomIndexs[ i ],randomIndexs[ random.getRandomNumberInt(0,numTrainingSamples) ]);
    }
    for(UINT k=0; k<numClusters; k++){
        for(UINT n=0; n<numInputDimensions; n++){
            mu[k][n] = data[ randomIndexs[k] ][n];
        }
    }
    
    //Setup sigma and the uniform prior on P(k)
    for(UINT k=0; k<numClusters; k++){
        frac[k] = 1.0/Float(numClusters);
        for(UINT i=0; i<numInputDimensions; i++){
            for(UINT j=0; j<numInputDimensions; j++) sigma[k][i][j] = 0;
            sigma[k][i][i] = 1.0e-2;   //Set the diagonal to a small number
        }
    }
    
    loglike = 0;
    bool keepGoing = true;
    Float change = 99.9e99;
    UINT numIterationsNoChange = 0;
    VectorFloat u(numInputDimensions);
	VectorFloat v(numInputDimensions);
    
    while( keepGoing ){
        
        //Run the estep
        if( estep( data, u, v, change ) ){
            
            //Run the mstep
            mstep( data );
        
            //Check for convergance
            if( fabs( change ) < minChange ){
                if( ++numIterationsNoChange >= minNumEpochs ){
                    keepGoing = false;
                }
            }else numIterationsNoChange = 0;
            if( ++numTrainingIterationsToConverge >= maxNumEpochs ) keepGoing = false;
            
        }else{
            errorLog << "train_(MatrixFloat &data) - Estep failed at iteration " << numTrainingIterationsToConverge << std::endl;
            return false;
        }
    }
    
    //Compute the inverse of sigma and the determinants for prediction
    if( !computeInvAndDet() ){
        det.clear();
        invSigma.clear();
        errorLog << "train_(MatrixFloat &data) - Failed to compute inverse and determinat!" << std::endl;
        return false;
    }
    
    //Flag that the model was trained
    trained = true;
    
    //Setup the cluster labels
    clusterLabels.resize(numClusters);
    for(UINT i=0; i<numClusters; i++){
        clusterLabels[i] = i+1;
    }
    clusterLikelihoods.resize(numClusters,0);
    clusterDistances.resize(numClusters,0);
    
    return true;
}
Пример #2
0
bool SelfOrganizingMap::train_( MatrixFloat &data ){
    
    //Clear any previous models
    clear();
    
    const UINT M = data.getNumRows();
    const UINT N = data.getNumCols();
    numInputDimensions = N;
    numOutputDimensions = numClusters*numClusters;
    Random rand;
    
    //Setup the neurons
    neurons.resize( numClusters, numClusters );
    
    if( neurons.getSize() != numClusters*numClusters ){
        errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons matrix, there might not be enough memory!" << std::endl;
        return false;
    }
    
    //Init the neurons
    for(UINT i=0; i<numClusters; i++){
        for(UINT j=0; j<numClusters; j++){
            neurons[i][j].init( N, 0.5, SOM_MIN_TARGET, SOM_MAX_TARGET );
        }
    }
    
    //Scale the data if needed
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<M; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,SOM_MIN_TARGET,SOM_MAX_TARGET);
            }
        }
    }
    
    Float error = 0;
    Float lastError = 0;
    Float trainingSampleError = 0;
    Float delta = 0;
    Float minChange = 0;
    Float weightUpdate = 0;
    Float alpha = 1.0;
    Float neuronDiff = 0;
    Float neuronWeightFunction = 0;
    Float gamma = 0;
    UINT iter = 0;
    bool keepTraining = true;
    VectorFloat trainingSample;
    Vector< UINT > randomTrainingOrder(M);
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Enter the main training loop
    while( keepTraining ){
        
        //Update alpha based on the current iteration
        alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd);
        
        //Run one epoch of training using the online best-matching-unit algorithm
        error = 0;
        for(UINT m=0; m<M; m++){
            
            trainingSampleError = 0;
            
            //Get the i'th random training sample
            trainingSample = data.getRowVector( randomTrainingOrder[m] );
            
            //Find the best matching unit
            Float dist = 0;
            Float bestDist = grt_numeric_limits< Float >::max();
            UINT bestIndexRow = 0;
            UINT bestIndexCol = 0;
            for(UINT i=0; i<numClusters; i++){
                for(UINT j=0; j<numClusters; j++){
                    dist = neurons[i][j].getSquaredWeightDistance( trainingSample );
                    if( dist < bestDist ){
                        bestDist = dist;
                        bestIndexRow = i;
                        bestIndexCol = j;
                    }
                }
            }
            error += bestDist;
            
            //Update the weights based on the distance to the winning neuron
            //Neurons closer to the winning neuron will have their weights update more
            const Float bir = bestIndexRow;
            const Float bic = bestIndexCol;
            for(UINT i=0; i<numClusters; i++){  
                for(UINT j=0; j<numClusters; j++){
                
                    //Update the weights for all the neurons, pulling them a little closer to the input example
                    neuronDiff = 0;
                    gamma = 2.0 * grt_sqr( numClusters * sigmaWeight );
                    neuronWeightFunction = exp( -grt_sqr(bir-i)/gamma ) * exp( -grt_sqr(bic-j)/gamma );
                    //std::cout << "best index: " << bestIndexRow << " " << bestIndexCol << " bestDist: " << bestDist << " pos: " << i << " " << j << " neuronWeightFunction: " << neuronWeightFunction << std::endl;
                    for(UINT n=0; n<N; n++){
                        neuronDiff = trainingSample[n] - neurons[i][j][n];
                        weightUpdate = neuronWeightFunction * alpha * neuronDiff;
                        neurons[i][j][n] += weightUpdate;
                    }
                }
            }
        }

        error = error / M;

        trainingLog << "iter: " << iter << " average error: " << error << std::endl;
        
        //Compute the error
        delta = fabs( error-lastError );
        lastError = error;
        
        //Check to see if we should stop
        if( delta <= minChange && false ){
            converged = true;
            keepTraining = false;
        }
        
        if( grt_isinf( error ) ){
            errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl;
            return false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl;
    }
    
    numTrainingIterationsToConverge = iter;
    trained = true;
    
    return true;
}
Пример #3
0
bool BernoulliRBM::train_(MatrixFloat &data){
    
    const UINT numTrainingSamples = data.getNumRows();
    numInputDimensions = data.getNumCols();
    numOutputDimensions = numHiddenUnits;
    numVisibleUnits = numInputDimensions;
    
    trainingLog << "NumInputDimensions: " << numInputDimensions << std::endl;
    trainingLog << "NumOutputDimensions: " << numOutputDimensions << std::endl;
    
    if( randomizeWeightsForTraining ){
    
        //Init the weights matrix
        weightsMatrix.resize(numHiddenUnits, numVisibleUnits);
        
        Float a = 1.0 / numVisibleUnits;
        for(UINT i=0; i<numHiddenUnits; i++) {
            for(UINT j=0; j<numVisibleUnits; j++) {
                weightsMatrix[i][j] = rand.getRandomNumberUniform(-a, a);
            }
        }

        //Init the bias units
        visibleLayerBias.resize( numVisibleUnits );
        hiddenLayerBias.resize( numHiddenUnits );
        std::fill(visibleLayerBias.begin(),visibleLayerBias.end(),0);
        std::fill(hiddenLayerBias.begin(),hiddenLayerBias.end(),0);
        
    }else{
        if( weightsMatrix.getNumRows() != numHiddenUnits ){
            errorLog << "train_(MatrixFloat &data) - Weights matrix row size does not match the number of hidden units!" << std::endl;
            return false;
        }
        if( weightsMatrix.getNumCols() != numVisibleUnits ){
            errorLog << "train_(MatrixFloat &data) - Weights matrix row size does not match the number of visible units!" << std::endl;
            return false;
        }
        if( visibleLayerBias.size() != numVisibleUnits ){
            errorLog << "train_(MatrixFloat &data) - Visible layer bias size does not match the number of visible units!" << std::endl;
            return false;
        }
        if( hiddenLayerBias.size() != numHiddenUnits ){
            errorLog << "train_(MatrixFloat &data) - Hidden layer bias size does not match the number of hidden units!" << std::endl;
            return false;
        }
    }
    
    //Flag the model has been trained encase the user wants to save the model during a training iteration using an observer
    trained = true;
    
    //Make sure the data is scaled between [0 1]
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<numTrainingSamples; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = grt_scale(data[i][j], ranges[j].minValue, ranges[j].maxValue, 0.0, 1.0);
            }
        }
    }
    

    const UINT numBatches = static_cast<UINT>( ceil( Float(numTrainingSamples)/batchSize ) );
    
    //Setup the batch indexs
    Vector< BatchIndexs > batchIndexs( numBatches );
    UINT startIndex = 0;
    for(UINT i=0; i<numBatches; i++){
        batchIndexs[i].startIndex = startIndex;
        batchIndexs[i].endIndex = startIndex + batchSize;
        
        //Make sure the last batch end index is not larger than the number of training examples
        if( batchIndexs[i].endIndex >= numTrainingSamples ){
            batchIndexs[i].endIndex = numTrainingSamples;
        }
        
        //Get the batch size
        batchIndexs[i].batchSize = batchIndexs[i].endIndex - batchIndexs[i].startIndex;
        
        //Set the start index for the next batch
        startIndex = batchIndexs[i].endIndex;
    }
    
    Timer timer;
    UINT i,j,n,epoch,noChangeCounter = 0;
    Float startTime = 0;
    Float alpha = learningRate;
    Float error = 0;
    Float err = 0;
    Float delta = 0;
    Float lastError = 0;
    Vector< UINT > indexList(numTrainingSamples);
    TrainingResult trainingResult;
    MatrixFloat wT( numVisibleUnits, numHiddenUnits );       //Stores a transposed copy of the weights vector
    MatrixFloat vW( numHiddenUnits, numVisibleUnits );       //Stores the weight velocity updates
    MatrixFloat tmpW( numHiddenUnits, numVisibleUnits );     //Stores the weight values that will be used to update the main weights matrix at each batch update
    MatrixFloat v1( batchSize, numVisibleUnits );            //Stores the real batch data during a batch update
    MatrixFloat v2( batchSize, numVisibleUnits );            //Stores the sampled batch data during a batch update
    MatrixFloat h1( batchSize, numHiddenUnits );             //Stores the hidden states given v1 and the current weightsMatrix
    MatrixFloat h2( batchSize, numHiddenUnits );             //Stores the sampled hidden states given v2 and the current weightsMatrix
    MatrixFloat c1( numHiddenUnits, numVisibleUnits );       //Stores h1' * v1
    MatrixFloat c2( numHiddenUnits, numVisibleUnits );       //Stores h2' * v2
    MatrixFloat vDiff( batchSize, numVisibleUnits );         //Stores the difference between v1-v2
    MatrixFloat hDiff( batchSize, numVisibleUnits );         //Stores the difference between h1-h2
    MatrixFloat cDiff( numHiddenUnits, numVisibleUnits );    //Stores the difference between c1-c2
    VectorFloat vDiffSum( numVisibleUnits );                 //Stores the column sum of vDiff
    VectorFloat hDiffSum( numHiddenUnits );                  //Stores the column sum of hDiff
    VectorFloat visibleLayerBiasVelocity( numVisibleUnits ); //Stores the velocity update of the visibleLayerBias
    VectorFloat hiddenLayerBiasVelocity( numHiddenUnits );   //Stores the velocity update of the hiddenLayerBias
    
    //Set all the velocity weights to zero
    vW.setAllValues( 0 );
    std::fill(visibleLayerBiasVelocity.begin(),visibleLayerBiasVelocity.end(),0);
    std::fill(hiddenLayerBiasVelocity.begin(),hiddenLayerBiasVelocity.end(),0);
    
    //Randomize the order that the training samples will be used in
    for(UINT i=0; i<numTrainingSamples; i++) indexList[i] = i;
    if( randomiseTrainingOrder ){
        std::random_shuffle(indexList.begin(), indexList.end());
    }
    
    //Start the main training loop
    timer.start();
    for(epoch=0; epoch<maxNumEpochs; epoch++) {
        startTime = timer.getMilliSeconds();
        error = 0;
        
        //Randomize the batch order
        std::random_shuffle(batchIndexs.begin(),batchIndexs.end());
        
        //Run each of the batch updates
        for(UINT k=0; k<numBatches; k+=batchStepSize){
            
            //Resize the data matrices, the matrices will only be resized if the rows cols are different
            v1.resize( batchIndexs[k].batchSize, numVisibleUnits );
            h1.resize( batchIndexs[k].batchSize, numHiddenUnits );
            v2.resize( batchIndexs[k].batchSize, numVisibleUnits );
            h2.resize( batchIndexs[k].batchSize, numHiddenUnits );
            
            //Setup the data pointers, using data pointers saves a few ms on large matrix updates
            Float **w_p = weightsMatrix.getDataPointer();
            Float **wT_p = wT.getDataPointer();
            Float **vW_p = vW.getDataPointer();
            Float **data_p = data.getDataPointer();
            Float **v1_p = v1.getDataPointer();
            Float **v2_p = v2.getDataPointer();
            Float **h1_p = h1.getDataPointer();
            Float **h2_p = h2.getDataPointer();
            Float *vlb_p = &visibleLayerBias[0];
            Float *hlb_p = &hiddenLayerBias[0];
            
            //Get the batch data
            UINT index = 0;
            for(i=batchIndexs[k].startIndex; i<batchIndexs[k].endIndex; i++){
                for(j=0; j<numVisibleUnits; j++){
                    v1_p[index][j] = data_p[ indexList[i] ][j];
                }
                index++;
            }
            
            //Copy a transposed version of the weights matrix, this is used to compute h1 and h2
            for(i=0; i<numHiddenUnits; i++)
                for(j=0; j<numVisibleUnits; j++)
                    wT_p[j][i] = w_p[i][j];
            
            //Compute h1
            h1.multiple(v1, wT);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numHiddenUnits; i++){
                    h1_p[n][i] = sigmoidRandom( h1_p[n][i] + hlb_p[i] );
                }
            }
            
            //Compute v2
            v2.multiple(h1, weightsMatrix);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numVisibleUnits; i++){
                    v2_p[n][i] = sigmoidRandom( v2_p[n][i] + vlb_p[i] );
                }
            }
            
            //Compute h2
            h2.multiple(v2,wT);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numHiddenUnits; i++){
                    h2_p[n][i] = grt_sigmoid( h2_p[n][i] + hlb_p[i] );
                }
            }
            
            //Compute c1, c2 and the difference between v1-v2
            c1.multiple(h1,v1,true);
            c2.multiple(h2,v2,true);
            vDiff.subtract(v1, v2);
            
            //Compute the sum of vdiff
            for(j=0; j<numVisibleUnits; j++){
                vDiffSum[j] = 0;
                for(i=0; i<batchIndexs[k].batchSize; i++){
                    vDiffSum[j] += vDiff[i][j];
                }
            }
            
            //Compute the difference between h1 and h2
            hDiff.subtract(h1, h2);
            for(j=0; j<numHiddenUnits; j++){
                hDiffSum[j] = 0;
                for(i=0; i<batchIndexs[k].batchSize; i++){
                    hDiffSum[j] += hDiff[i][j];
                }
            }
            
            //Compute the difference between c1 and c2
            cDiff.subtract(c1,c2);
            
            //Update the weight velocities
            for(i=0; i<numHiddenUnits; i++){
                for(j=0; j<numVisibleUnits; j++){
                    vW_p[i][j] = ((momentum * vW_p[i][j]) + (alpha * cDiff[i][j])) / batchIndexs[k].batchSize;
                }
            }
            for(i=0; i<numVisibleUnits; i++){
                visibleLayerBiasVelocity[i] = ((momentum * visibleLayerBiasVelocity[i]) + (alpha * vDiffSum[i])) / batchIndexs[k].batchSize;
            }
            for(i=0; i<numHiddenUnits; i++){
                hiddenLayerBiasVelocity[i] = ((momentum * hiddenLayerBiasVelocity[i]) + (alpha * hDiffSum[i])) / batchIndexs[k].batchSize;
            }
            
            //Update the weights
            weightsMatrix.add( vW );
            
            //Update the bias for the visible layer
            for(i=0; i<numVisibleUnits; i++){
                visibleLayerBias[i] += visibleLayerBiasVelocity[i];
            }
            
            //Update the bias for the visible layer
            for(i=0; i<numHiddenUnits; i++){
                hiddenLayerBias[i] += hiddenLayerBiasVelocity[i];
            }
            
            //Compute the reconstruction error
            err = 0;
            for(i=0; i<batchIndexs[k].batchSize; i++){
                for(j=0; j<numVisibleUnits; j++){
                    err += SQR( v1[i][j] - v2[i][j] );
                }
            }
            
            error += err / batchIndexs[k].batchSize;
        }
        error /= numBatches;
        delta = lastError - error;
        lastError = error;
        
        trainingLog << "Epoch: " << epoch+1 << "/" << maxNumEpochs;
        trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds";
        trainingLog << " Learning rate: " << alpha;
        trainingLog << " Momentum: " << momentum;
        trainingLog << " Average reconstruction error: " << error;
        trainingLog << " Delta: " << delta << std::endl;
        
        //Update the learning rate
        alpha *= learningRateUpdate;
        
        trainingResult.setClassificationResult(epoch, error, this);
        trainingResults.push_back(trainingResult);
        trainingResultsObserverManager.notifyObservers( trainingResult );
        
        //Check for convergance
        if( fabs(delta) < minChange ){
            if( ++noChangeCounter >= minNumEpochs ){
                trainingLog << "Stopping training. MinChange limit reached!" << std::endl;
                break;
            }
        }else noChangeCounter = 0;
        
    }
    trainingLog << "Training complete after " << epoch << " epochs. Total training time: " << timer.getMilliSeconds()/1000.0 << " seconds" << std::endl;
    
    trained = true;
    
    return true;
}
Пример #4
0
bool KMeans::trainModel(MatrixFloat &data){
    
    if( numClusters == 0 ){
        errorLog << "trainModel(MatrixFloat &data) - Failed to train model. NumClusters is zero!" << std::endl;
		return false;
	}
    
    if( clusters.getNumRows() != numClusters ){
        errorLog << "trainModel(MatrixFloat &data) - Failed to train model. The number of rows in the cluster matrix does not match the number of clusters! You should need to initalize the clusters matrix first before calling this function!" << std::endl;
		return false;
	}
    
    if( clusters.getNumCols() != numInputDimensions ){
        errorLog << "trainModel(MatrixFloat &data) - Failed to train model. The number of columns in the cluster matrix does not match the number of input dimensions! You should need to initalize the clusters matrix first before calling this function!" << std::endl;
		return false;
	}

    Timer timer;
	UINT currentIter = 0;
    UINT numChanged = 0;
	bool keepTraining = true;
    Float theta = 0;
    Float lastTheta = 0;
    Float delta = 0;
    Float startTime = 0;
    thetaTracker.clear();
    finalTheta = 0;
    numTrainingIterationsToConverge = 0;
    trained = false;
    converged = false;
    
    //Scale the data if needed
    ranges = data.getRanges();
    if( useScaling ){
        data.scale(0,1);
    }

    //Init the assign and count Vectors
    //Assign is set to K+1 so that the nChanged values in the eStep at the first iteration will be updated correctly
    for(UINT m=0; m<numTrainingSamples; m++) assign[m] = numClusters+1;
	for(UINT k=0; k<numClusters; k++) count[k] = 0;

    //Run the training loop
    timer.start();
	while( keepTraining ){
        startTime = timer.getMilliSeconds();

		//Compute the E step
		numChanged = estep( data );

        //Compute the M step
        mstep( data );

        //Update the iteration counter
		currentIter++;

		//Compute theta if needed
		if( computeTheta ){
            theta = calculateTheta(data);
            delta = lastTheta - theta;
            lastTheta = theta;
        }else theta = delta = 0;
        
        //Check convergance
		if( numChanged == 0 && currentIter > minNumEpochs ){ converged = true; keepTraining = false; }
		if( currentIter >= maxNumEpochs ){ keepTraining = false; }
		if( fabs( delta ) < minChange && computeTheta && currentIter > minNumEpochs ){ converged = true; keepTraining = false; }
        if( computeTheta )  thetaTracker.push_back( theta );
        
        trainingLog << "Epoch: " << currentIter << "/" << maxNumEpochs;
        trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds";
        trainingLog << " Theta: " << theta << " Delta: " << delta << std::endl;
	}
    trainingLog << "Model Trained at epoch: " << currentIter << " with a theta value of: " << theta << std::endl;

    finalTheta = theta;
    numTrainingIterationsToConverge = currentIter;
	trained = true;
    
    //Setup the cluster labels
    clusterLabels.resize(numClusters);
    for(UINT i=0; i<numClusters; i++){
        clusterLabels[i] = i+1;
    }
    clusterLikelihoods.resize(numClusters,0);
    clusterDistances.resize(numClusters,0);
	
	return true;
}
Пример #5
0
bool SelfOrganizingMap::train_( MatrixFloat &data ){
    
    //Clear any previous models
    clear();
    
    const UINT M = data.getNumRows();
    const UINT N = data.getNumCols();
    numInputDimensions = N;
    numOutputDimensions = numClusters;
    Random rand;
    
    //Setup the neurons
    neurons.resize( numClusters );
    
    if( neurons.size() != numClusters ){
        errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons Vector, there might not be enough memory!" << std::endl;
        return false;
    }
    
    for(UINT j=0; j<numClusters; j++){
        
        //Init the neuron
        neurons[j].init( N, 0.5 );
        
        //Set the weights as a random training example
        neurons[j].weights = data.getRowVector( rand.getRandomNumberInt(0, M) );
    }
    
    //Setup the network weights
    switch( networkTypology ){
        case RANDOM_NETWORK:
            networkWeights.resize(numClusters, numClusters);
            
            //Set the diagonal weights as 1 (as i==j)
            for(UINT i=0; i<numClusters; i++){
                networkWeights[i][i] = 1;
            }
            
            //Randomize the other weights
            UINT indexA = 0;
            UINT indexB = 0;
            Float weight = 0;
            for(UINT i=0; i<numClusters*numClusters; i++){
                indexA = rand.getRandomNumberInt(0, numClusters);
                indexB = rand.getRandomNumberInt(0, numClusters);
                
                //Make sure the two random indexs are the same (as this is a diagonal and should be 1)
                if( indexA != indexB ){
                    //Pick a random weight between these two neurons
                    weight = rand.getRandomNumberUniform(0,1);
                    
                    //The weight betwen neurons a and b is the mirrored
                    networkWeights[indexA][indexB] = weight;
                    networkWeights[indexB][indexA] = weight;
                }
            }
            break;
    }
    
    //Scale the data if needed
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<M; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
            }
        }
    }
    
    Float error = 0;
    Float lastError = 0;
    Float trainingSampleError = 0;
    Float delta = 0;
    Float minChange = 0;
    Float weightUpdate = 0;
    Float weightUpdateSum = 0;
    Float alpha = 1.0;
    Float neuronDiff = 0;
    UINT iter = 0;
    bool keepTraining = true;
    VectorFloat trainingSample;
    Vector< UINT > randomTrainingOrder(M);
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Enter the main training loop
    while( keepTraining ){
        
        //Update alpha based on the current iteration
        alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd);
        
        //Run one epoch of training using the online best-matching-unit algorithm
        error = 0;
        for(UINT i=0; i<M; i++){
            
            trainingSampleError = 0;
            
            //Get the i'th random training sample
            trainingSample = data.getRowVector( randomTrainingOrder[i] );
            
            //Find the best matching unit
            Float dist = 0;
            Float bestDist = grt_numeric_limits< Float >::max();
            UINT bestIndex = 0;
            for(UINT j=0; j<numClusters; j++){
                dist = neurons[j].getSquaredWeightDistance( trainingSample );
                if( dist < bestDist ){
                    bestDist = dist;
                    bestIndex = j;
                }
            }
            
            //Update the weights based on the distance to the winning neuron
            //Neurons closer to the winning neuron will have their weights update more
            for(UINT j=0; j<numClusters; j++){
                
                //Update the weights for the j'th neuron
                weightUpdateSum = 0;
                neuronDiff = 0;
                for(UINT n=0; n<N; n++){
                    neuronDiff = trainingSample[n] - neurons[j][n];
                    weightUpdate = networkWeights[bestIndex][j] * alpha * neuronDiff;
                    neurons[j][n] += weightUpdate;
                    weightUpdateSum += neuronDiff;
                }
                
                trainingSampleError += grt_sqr( weightUpdateSum );
            }
            
            error += grt_sqrt( trainingSampleError / numClusters );
        }
        
        //Compute the error
        delta = fabs( error-lastError );
        lastError = error;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            converged = true;
            keepTraining = false;
        }
        
        if( grt_isinf( error ) ){
            errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl;
            return false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl;
    }
    
    numTrainingIterationsToConverge = iter;
    trained = true;
    
    return true;
}