bool KMeans::predict_(VectorFloat &inputVector){ if( !trained ){ return false; } if( inputVector.getSize() != numInputDimensions ){ return false; } if( useScaling ){ for(UINT n=0; n<numInputDimensions; n++){ inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0); } } const Float sigma = 1.0; const Float gamma = 1.0 / (2.0*grt_sqr(sigma)); Float sum = 0; Float dist = 0; UINT minIndex = 0; bestDistance = grt_numeric_limits< Float >::max(); predictedClusterLabel = 0; maxLikelihood = 0; if( clusterLikelihoods.getSize() != numClusters ) clusterLikelihoods.resize( numClusters ); if( clusterDistances.getSize() != numClusters ) clusterDistances.resize( numClusters ); for(UINT i=0; i<numClusters; i++){ //We don't need to compute the sqrt as it works without it and is faster dist = 0; for(UINT j=0; j<numInputDimensions; j++){ dist += grt_sqr( inputVector[j]-clusters[i][j] ); } clusterDistances[i] = dist; clusterLikelihoods[i] = exp( - grt_sqr(gamma * dist) ); //1.0/(1.0+dist); //This will give us a value close to 1 for a dist of 0, and a value closer to 0 when the dist is large sum += clusterLikelihoods[i]; if( dist < bestDistance ){ bestDistance = dist; minIndex = i; } } //Normalize the likelihood for(UINT i=0; i<numClusters; i++){ clusterLikelihoods[i] /= sum; } predictedClusterLabel = clusterLabels[ minIndex ]; maxLikelihood = clusterLikelihoods[ minIndex ]; return true; }
UINT KMeans::estep(const MatrixFloat &data) { UINT k,m,n,kmin; Float dmin,d; nchg = 0; kmin = 0; //Reset Count for (k=0; k < numClusters; k++) count[k] = 0; //Search for the closest center and reasign if needed for (m=0; m < numTrainingSamples; m++) { dmin = 9.99e+99; //Set dmin to a really big value for (k=0; k < numClusters; k++) { d = 0.0; for (n=0; n < numInputDimensions; n++) d += grt_sqr( data[m][n]-clusters[k][n] ); if (d <= dmin){ dmin = d; kmin = k; } } if ( kmin != assign[m] ){ nchg++; assign[m] = kmin; } count[kmin]++; } return nchg; }
Float HierarchicalClustering::computeClusterVariance( const ClusterInfo &cluster, const MatrixFloat &data ){ VectorFloat mean(N,0); VectorFloat std(N,0); //Compute the mean UINT numSamples = cluster.getNumSamplesInCluster(); for(UINT j=0; j<N; j++){ for(UINT i=0; i<numSamples; i++){ UINT index = cluster[i]; mean[j] += data[ index ][j]; } mean[j] /= Float( numSamples ); } //Compute the std dev for(UINT j=0; j<N; j++){ for(UINT i=0; i<numSamples; i++){ std[j] += grt_sqr( data[ cluster[i] ][j] - mean[j] ); } std[j] = grt_sqrt( std[j] / Float( numSamples-1 ) ); } Float variance = 0; for(UINT j=0; j<N; j++){ variance += std[j]; } return variance/N; }
UINT KMeansQuantizer::quantize(const VectorFloat &inputVector){ if( !trained ){ errorLog << "computeFeatures(const VectorFloat &inputVector) - The quantizer has not been trained!" << std::endl; return 0; } if( inputVector.getSize() != numInputDimensions ){ errorLog << "computeFeatures(const VectorFloat &inputVector) - The size of the inputVector (" << inputVector.getSize() << ") does not match that of the filter (" << numInputDimensions << ")!" << std::endl; return 0; } //Find the minimum cluster Float minDist = grt_numeric_limits< Float >::max(); UINT quantizedValue = 0; for(UINT k=0; k<numClusters; k++){ //Compute the squared Euclidean distance quantizationDistances[k] = 0; for(UINT i=0; i<numInputDimensions; i++){ quantizationDistances[k] += grt_sqr( inputVector[i]-clusters[k][i] ); } if( quantizationDistances[k] < minDist ){ minDist = quantizationDistances[k]; quantizedValue = k; } } featureVector[0] = quantizedValue; featureDataReady = true; return quantizedValue; }
Float VectorFloat::getStdDev() const { Float mean = getMean(); Float stdDev = 0.0; const size_type N = this->size(); const Float *data = getData(); for(size_type i=0; i<N; i++){ stdDev += grt_sqr(data[i]-mean); } stdDev = grt_sqrt( stdDev / Float(N-1) ); return stdDev; }
Float KMeans::calculateTheta(const MatrixFloat &data){ Float theta = 0; Float sum = 0; UINT m,n,k = 0; for(m=0; m < numTrainingSamples; m++){ k = assign[m]; sum = 0; for(n=0; n < numInputDimensions; n++){ sum += grt_sqr(clusters[k][n] - data[m][n]); } theta += grt_sqrt(sum); } theta /= numTrainingSamples; return theta; }
bool SelfOrganizingMap::train_( MatrixFloat &data ){ //Clear any previous models clear(); const UINT M = data.getNumRows(); const UINT N = data.getNumCols(); numInputDimensions = N; numOutputDimensions = numClusters*numClusters; Random rand; //Setup the neurons neurons.resize( numClusters, numClusters ); if( neurons.getSize() != numClusters*numClusters ){ errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons matrix, there might not be enough memory!" << std::endl; return false; } //Init the neurons for(UINT i=0; i<numClusters; i++){ for(UINT j=0; j<numClusters; j++){ neurons[i][j].init( N, 0.5, SOM_MIN_TARGET, SOM_MAX_TARGET ); } } //Scale the data if needed ranges = data.getRanges(); if( useScaling ){ for(UINT i=0; i<M; i++){ for(UINT j=0; j<numInputDimensions; j++){ data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,SOM_MIN_TARGET,SOM_MAX_TARGET); } } } Float error = 0; Float lastError = 0; Float trainingSampleError = 0; Float delta = 0; Float minChange = 0; Float weightUpdate = 0; Float alpha = 1.0; Float neuronDiff = 0; Float neuronWeightFunction = 0; Float gamma = 0; UINT iter = 0; bool keepTraining = true; VectorFloat trainingSample; Vector< UINT > randomTrainingOrder(M); //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Enter the main training loop while( keepTraining ){ //Update alpha based on the current iteration alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd); //Run one epoch of training using the online best-matching-unit algorithm error = 0; for(UINT m=0; m<M; m++){ trainingSampleError = 0; //Get the i'th random training sample trainingSample = data.getRowVector( randomTrainingOrder[m] ); //Find the best matching unit Float dist = 0; Float bestDist = grt_numeric_limits< Float >::max(); UINT bestIndexRow = 0; UINT bestIndexCol = 0; for(UINT i=0; i<numClusters; i++){ for(UINT j=0; j<numClusters; j++){ dist = neurons[i][j].getSquaredWeightDistance( trainingSample ); if( dist < bestDist ){ bestDist = dist; bestIndexRow = i; bestIndexCol = j; } } } error += bestDist; //Update the weights based on the distance to the winning neuron //Neurons closer to the winning neuron will have their weights update more const Float bir = bestIndexRow; const Float bic = bestIndexCol; for(UINT i=0; i<numClusters; i++){ for(UINT j=0; j<numClusters; j++){ //Update the weights for all the neurons, pulling them a little closer to the input example neuronDiff = 0; gamma = 2.0 * grt_sqr( numClusters * sigmaWeight ); neuronWeightFunction = exp( -grt_sqr(bir-i)/gamma ) * exp( -grt_sqr(bic-j)/gamma ); //std::cout << "best index: " << bestIndexRow << " " << bestIndexCol << " bestDist: " << bestDist << " pos: " << i << " " << j << " neuronWeightFunction: " << neuronWeightFunction << std::endl; for(UINT n=0; n<N; n++){ neuronDiff = trainingSample[n] - neurons[i][j][n]; weightUpdate = neuronWeightFunction * alpha * neuronDiff; neurons[i][j][n] += weightUpdate; } } } } error = error / M; trainingLog << "iter: " << iter << " average error: " << error << std::endl; //Compute the error delta = fabs( error-lastError ); lastError = error; //Check to see if we should stop if( delta <= minChange && false ){ converged = true; keepTraining = false; } if( grt_isinf( error ) ){ errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl; return false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl; } numTrainingIterationsToConverge = iter; trained = true; return true; }
bool SelfOrganizingMap::train_( MatrixFloat &data ){ //Clear any previous models clear(); const UINT M = data.getNumRows(); const UINT N = data.getNumCols(); numInputDimensions = N; numOutputDimensions = numClusters; Random rand; //Setup the neurons neurons.resize( numClusters ); if( neurons.size() != numClusters ){ errorLog << "train_( MatrixFloat &data ) - Failed to resize neurons Vector, there might not be enough memory!" << std::endl; return false; } for(UINT j=0; j<numClusters; j++){ //Init the neuron neurons[j].init( N, 0.5 ); //Set the weights as a random training example neurons[j].weights = data.getRowVector( rand.getRandomNumberInt(0, M) ); } //Setup the network weights switch( networkTypology ){ case RANDOM_NETWORK: networkWeights.resize(numClusters, numClusters); //Set the diagonal weights as 1 (as i==j) for(UINT i=0; i<numClusters; i++){ networkWeights[i][i] = 1; } //Randomize the other weights UINT indexA = 0; UINT indexB = 0; Float weight = 0; for(UINT i=0; i<numClusters*numClusters; i++){ indexA = rand.getRandomNumberInt(0, numClusters); indexB = rand.getRandomNumberInt(0, numClusters); //Make sure the two random indexs are the same (as this is a diagonal and should be 1) if( indexA != indexB ){ //Pick a random weight between these two neurons weight = rand.getRandomNumberUniform(0,1); //The weight betwen neurons a and b is the mirrored networkWeights[indexA][indexB] = weight; networkWeights[indexB][indexA] = weight; } } break; } //Scale the data if needed ranges = data.getRanges(); if( useScaling ){ for(UINT i=0; i<M; i++){ for(UINT j=0; j<numInputDimensions; j++){ data[i][j] = scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,0,1); } } } Float error = 0; Float lastError = 0; Float trainingSampleError = 0; Float delta = 0; Float minChange = 0; Float weightUpdate = 0; Float weightUpdateSum = 0; Float alpha = 1.0; Float neuronDiff = 0; UINT iter = 0; bool keepTraining = true; VectorFloat trainingSample; Vector< UINT > randomTrainingOrder(M); //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Enter the main training loop while( keepTraining ){ //Update alpha based on the current iteration alpha = Util::scale(iter,0,maxNumEpochs,alphaStart,alphaEnd); //Run one epoch of training using the online best-matching-unit algorithm error = 0; for(UINT i=0; i<M; i++){ trainingSampleError = 0; //Get the i'th random training sample trainingSample = data.getRowVector( randomTrainingOrder[i] ); //Find the best matching unit Float dist = 0; Float bestDist = grt_numeric_limits< Float >::max(); UINT bestIndex = 0; for(UINT j=0; j<numClusters; j++){ dist = neurons[j].getSquaredWeightDistance( trainingSample ); if( dist < bestDist ){ bestDist = dist; bestIndex = j; } } //Update the weights based on the distance to the winning neuron //Neurons closer to the winning neuron will have their weights update more for(UINT j=0; j<numClusters; j++){ //Update the weights for the j'th neuron weightUpdateSum = 0; neuronDiff = 0; for(UINT n=0; n<N; n++){ neuronDiff = trainingSample[n] - neurons[j][n]; weightUpdate = networkWeights[bestIndex][j] * alpha * neuronDiff; neurons[j][n] += weightUpdate; weightUpdateSum += neuronDiff; } trainingSampleError += grt_sqr( weightUpdateSum ); } error += grt_sqrt( trainingSampleError / numClusters ); } //Compute the error delta = fabs( error-lastError ); lastError = error; //Check to see if we should stop if( delta <= minChange ){ converged = true; keepTraining = false; } if( grt_isinf( error ) ){ errorLog << "train_(MatrixFloat &data) - Training failed! Error is NAN!" << std::endl; return false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Epoch: " << iter << " Squared Error: " << error << " Delta: " << delta << " Alpha: " << alpha << std::endl; } numTrainingIterationsToConverge = iter; trained = true; return true; }
bool GMM::train_(ClassificationData &trainingData){ //Clear any old models clear(); if( trainingData.getNumSamples() == 0 ){ errorLog << "train_(ClassificationData &trainingData) - Training data is empty!" << std::endl; return false; } //Set the number of features and number of classes and resize the models buffer numInputDimensions = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); models.resize(numClasses); if( numInputDimensions >= 6 ){ warningLog << "train_(ClassificationData &trainingData) - The number of features in your training data is high (" << numInputDimensions << "). The GMMClassifier does not work well with high dimensional data, you might get better results from one of the other classifiers." << std::endl; } //Get the ranges of the training data if the training data is going to be scaled ranges = trainingData.getRanges(); if( !trainingData.scale(GMM_MIN_SCALE_VALUE, GMM_MAX_SCALE_VALUE) ){ errorLog << "train_(ClassificationData &trainingData) - Failed to scale training data!" << std::endl; return false; } //Fit a Mixture Model to each class (independently) for(UINT k=0; k<numClasses; k++){ UINT classLabel = trainingData.getClassTracker()[k].classLabel; ClassificationData classData = trainingData.getClassData( classLabel ); //Train the Mixture Model for this class GaussianMixtureModels gaussianMixtureModel; gaussianMixtureModel.setNumClusters( numMixtureModels ); gaussianMixtureModel.setMinChange( minChange ); gaussianMixtureModel.setMaxNumEpochs( maxIter ); if( !gaussianMixtureModel.train( classData.getDataAsMatrixFloat() ) ){ errorLog << "train_(ClassificationData &trainingData) - Failed to train Mixture Model for class " << classLabel << std::endl; return false; } //Setup the model container models[k].resize( numMixtureModels ); models[k].setClassLabel( classLabel ); //Store the mixture model in the container for(UINT j=0; j<numMixtureModels; j++){ models[k][j].mu = gaussianMixtureModel.getMu().getRowVector(j); models[k][j].sigma = gaussianMixtureModel.getSigma()[j]; //Compute the determinant and invSigma for the realtime prediction LUDecomposition ludcmp( models[k][j].sigma ); if( !ludcmp.inverse( models[k][j].invSigma ) ){ models.clear(); errorLog << "train_(ClassificationData &trainingData) - Failed to invert Matrix for class " << classLabel << "!" << std::endl; return false; } models[k][j].det = ludcmp.det(); } //Compute the normalize factor models[k].recomputeNormalizationFactor(); //Compute the rejection thresholds Float mu = 0; Float sigma = 0; VectorFloat predictionResults(classData.getNumSamples(),0); for(UINT i=0; i<classData.getNumSamples(); i++){ VectorFloat sample = classData[i].getSample(); predictionResults[i] = models[k].computeMixtureLikelihood( sample ); mu += predictionResults[i]; } //Update mu mu /= Float( classData.getNumSamples() ); //Calculate the standard deviation for(UINT i=0; i<classData.getNumSamples(); i++) sigma += grt_sqr( (predictionResults[i]-mu) ); sigma = grt_sqrt( sigma / (Float(classData.getNumSamples())-1.0) ); sigma = 0.2; //Set the models training mu and sigma models[k].setTrainingMuAndSigma(mu,sigma); if( !models[k].recomputeNullRejectionThreshold(nullRejectionCoeff) && useNullRejection ){ warningLog << "train_(ClassificationData &trainingData) - Failed to recompute rejection threshold for class " << classLabel << " - the nullRjectionCoeff value is too high!" << std::endl; } //cout << "Training Mu: " << mu << " TrainingSigma: " << sigma << " RejectionThreshold: " << models[k].getNullRejectionThreshold() << std::endl; //models[k].printModelValues(); } //Reset the class labels classLabels.resize(numClasses); for(UINT k=0; k<numClasses; k++){ classLabels[k] = models[k].getClassLabel(); } //Resize the rejection thresholds nullRejectionThresholds.resize(numClasses); for(UINT k=0; k<numClasses; k++){ nullRejectionThresholds[k] = models[k].getNullRejectionThreshold(); } //Flag that the models have been trained trained = true; return true; }
bool RegressionTree::computeBestSpiltBestIterativeSpilt( const RegressionData &trainingData, const Vector< UINT > &features, UINT &featureIndex, Float &threshold, Float &minError ){ const UINT M = trainingData.getNumSamples(); const UINT N = (UINT)features.size(); if( N == 0 ) return false; minError = grt_numeric_limits< Float >::max(); UINT bestFeatureIndex = 0; UINT groupID = 0; Float bestThreshold = 0; Float error = 0; Float minRange = 0; Float maxRange = 0; Float step = 0; Vector< UINT > groupIndex(M); VectorFloat groupCounter(2,0); VectorFloat groupMean(2,0); VectorFloat groupMSE(2,0); Vector< MinMax > ranges = trainingData.getInputRanges(); //Loop over each feature and try and find the best split point for(UINT n=0; n<N; n++){ minRange = ranges[n].minValue; maxRange = ranges[n].maxValue; step = (maxRange-minRange)/Float(numSplittingSteps); threshold = minRange; featureIndex = features[n]; while( threshold <= maxRange ){ //Iterate over each sample and work out what group it falls into for(UINT i=0; i<M; i++){ groupID = trainingData[i].getInputVector()[featureIndex] >= threshold ? 1 : 0; groupIndex[i] = groupID; groupMean[ groupID ] += trainingData[i].getInputVector()[featureIndex]; groupCounter[ groupID ]++; } groupMean[0] /= groupCounter[0] > 0 ? groupCounter[0] : 1; groupMean[1] /= groupCounter[1] > 0 ? groupCounter[1] : 1; //Compute the MSE for each group for(UINT i=0; i<M; i++){ groupMSE[ groupIndex[i] ] += grt_sqr( groupMean[ groupIndex[i] ] - trainingData[ i ].getInputVector()[features[n]] ); } groupMSE[0] /= groupCounter[0] > 0 ? groupCounter[0] : 1; groupMSE[1] /= groupCounter[1] > 0 ? groupCounter[1] : 1; error = sqrt( groupMSE[0] + groupMSE[1] ); //Store the best threshold and feature index if( error < minError ){ minError = error; bestThreshold = threshold; bestFeatureIndex = featureIndex; } //Update the threshold threshold += step; } } //Set the best feature index and threshold featureIndex = bestFeatureIndex; threshold = bestThreshold; return true; }