bool PrincipalComponentAnalysis::setModel( const VectorFloat &mean, const MatrixFloat &eigenvectors ){ if( (UINT)mean.size() != eigenvectors.getNumCols() ){ return false; } trained = true; numInputDimensions = eigenvectors.getNumCols(); numPrincipalComponents = eigenvectors.getNumRows(); this->mean = mean; stdDev.clear(); componentWeights.clear(); eigenvalues.clear(); sortedEigenvalues.clear(); this->eigenvectors = eigenvectors; //The eigenvectors are already sorted, so the sorted eigenvalues just holds the default index for(UINT i=0; i<numPrincipalComponents; i++){ sortedEigenvalues.push_back( IndexedDouble(i,0.0) ); } return true; }
bool PrincipalComponentAnalysis::computeFeatureVector_(const MatrixDouble &data,const UINT analysisMode) { trained = false; const UINT M = data.getNumRows(); const UINT N = data.getNumCols(); this->numInputDimensions = N; MatrixDouble msData( M, N ); //Compute the mean and standard deviation of the input data mean = data.getMean(); stdDev = data.getStdDev(); if( normData ) { //Normalize the data for(UINT i=0; i<M; i++) for(UINT j=0; j<N; j++) msData[i][j] = (data[i][j]-mean[j]) / stdDev[j]; } else { //Mean Subtract Data for(UINT i=0; i<M; i++) for(UINT j=0; j<N; j++) msData[i][j] = data[i][j] - mean[j]; } //Get the covariance matrix MatrixDouble cov = msData.getCovarianceMatrix(); //Use Eigen Value Decomposition to find eigenvectors of the covariance matrix EigenvalueDecomposition eig; if( !eig.decompose( cov ) ) { mean.clear(); stdDev.clear(); componentWeights.clear(); sortedEigenvalues.clear(); eigenvectors.clear(); errorLog << "computeFeatureVector(const MatrixDouble &data,UINT analysisMode) - Failed to decompose input matrix!" << endl; return false; } //Get the eigenvectors and eigenvalues eigenvectors = eig.getEigenvectors(); VectorDouble eigenvalues = eig.getRealEigenvalues(); //Any eigenvalues less than 0 are not worth anything so set to 0 for(UINT i=0; i<eigenvalues.size(); i++) { if( eigenvalues[i] < 0 ) eigenvalues[i] = 0; } //Sort the eigenvalues and compute the component weights double sum = 0; UINT componentIndex = 0; sortedEigenvalues.clear(); componentWeights.resize(N,0); while( true ) { double maxValue = 0; UINT index = 0; for(UINT i=0; i<eigenvalues.size(); i++) { if( eigenvalues[i] > maxValue ) { maxValue = eigenvalues[i]; index = i; } } if( maxValue == 0 || componentIndex >= eigenvalues.size() ) { break; } sortedEigenvalues.push_back( IndexedDouble(index,maxValue) ); componentWeights[ componentIndex++ ] = eigenvalues[ index ]; sum += eigenvalues[ index ]; eigenvalues[ index ] = 0; //Set the maxValue to zero so it won't be used again } double cumulativeVariance = 0; switch( analysisMode ) { case MAX_VARIANCE: //Normalize the component weights and workout how many components we need to use to reach the maxVariance numPrincipalComponents = 0; for(UINT k=0; k<N; k++) { componentWeights[k] /= sum; cumulativeVariance += componentWeights[k]; if( cumulativeVariance >= maxVariance && numPrincipalComponents==0 ) { numPrincipalComponents = k+1; } } break; case MAX_NUM_PCS: //Normalize the component weights and compute the maxVariance maxVariance = 0; for(UINT k=0; k<N; k++) { componentWeights[k] /= sum; if( k < numPrincipalComponents ) { maxVariance += componentWeights[k]; } } break; default: errorLog << "computeFeatureVector(const MatrixDouble &data,UINT analysisMode) - Unknown analysis mode!" << endl; break; } //Flag that the features have been computed trained = true; return true; }
bool KNN::train(LabelledClassificationData &trainingData){ if( !searchForBestKValue ){ return train_(trainingData,K); } UINT index = 0; double bestAccuracy = 0; vector< IndexedDouble > trainingAccuracyLog; for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){ //Randomly spilt the data and use 80% to train the algorithm and 20% to test it LabelledClassificationData trainingSet(trainingData); LabelledClassificationData testSet = trainingSet.partition(80,true); if( !train_(trainingSet, k) ){ errorLog << "Failed to train model for a k value of " << k << endl; }else{ //Compute the classification error double accuracy = 0; for(UINT i=0; i<testSet.getNumSamples(); i++){ vector< double > sample = testSet[i].getSample(); if( !predict( sample ) ){ errorLog << "Failed to predict label for test sample with a k value of " << k << endl; return false; } if( testSet[i].getClassLabel() == predictedClassLabel ){ accuracy++; } } accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0; trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) ); trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl; if( accuracy > bestAccuracy ){ bestAccuracy = accuracy; } index++; } } if( bestAccuracy > 0 ){ //Sort the training log by value std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending); //Copy the top matching values into a temporary buffer vector< IndexedDouble > tempLog; //Add the first value tempLog.push_back( trainingAccuracyLog[0] ); //Keep adding values until the value changes for(UINT i=1; i<trainingAccuracyLog.size(); i++){ if( trainingAccuracyLog[i].value == tempLog[0].value ){ tempLog.push_back( trainingAccuracyLog[i] ); }else break; } //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy) std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending); trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl; //Use the minimum index, this should give us the best accuracy with the minimum K value return train_(trainingData,tempLog[0].index); } return false; }
bool KNN::predict(VectorDouble inputVector,UINT K){ if( !trained ){ errorLog << "predict(VectorDouble inputVector,UINT K) - KNN model has not been trained" << endl; return false; } if( inputVector.size() != numFeatures ){ errorLog << "predict(VectorDouble inputVector) - the size of the input vector " << inputVector.size() << " does not match the number of features " << numFeatures << endl; return false; } if( K > trainingData.getNumSamples() ){ errorLog << "predict(VectorDouble inputVector,UINT K) - K Is Greater Than The Number Of Training Samples" << endl; return false; } if( useScaling ){ for(UINT i=0; i<inputVector.size(); i++){ inputVector[i] = scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1); } } //TODO - need to build a kdtree of the training data to allow better realtime prediction const UINT M = trainingData.getNumSamples(); vector< IndexedDouble > neighbours; for(UINT i=0; i<M; i++){ double dist = 0; UINT classLabel = trainingData[i].getClassLabel(); VectorDouble trainingSample = trainingData[i].getSample(); switch( distanceMethod ){ case EUCLIDEAN_DISTANCE: dist = computeEuclideanDistance(inputVector,trainingSample); break; case COSINE_DISTANCE: dist = computeCosineDistance(inputVector,trainingSample); break; case MANHATTAN_DISTANCE: dist = computeManhattanDistance(inputVector, trainingSample); break; default: errorLog << "predict(vector< double > inputVector) - unkown distance measure!" << endl; return false; break; } if( neighbours.size() < K ){ neighbours.push_back( IndexedDouble(classLabel,dist) ); }else{ //Find the maximum value in the neighbours buffer double maxValue = neighbours[0].value; UINT maxIndex = 0; for(UINT n=1; n<neighbours.size(); n++){ if( neighbours[n].value > maxValue ){ maxValue = neighbours[n].value; maxIndex = n; } } //If the dist is less than the maximum value in the buffer, then replace that value with the new dist if( dist < maxValue ){ neighbours[ maxIndex ] = IndexedDouble(classLabel,dist); } } } //Predict the class ID using the labels of the K nearest neighbours if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0); else for(UINT i=0; i<classLikelihoods.size(); i++){ classLikelihoods[i] = 0; } if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0); else for(UINT i=0; i<classDistances.size(); i++){ classDistances[i] = 0; } //Count the classes for(UINT k=0; k<neighbours.size(); k++){ UINT classLabel = neighbours[k].index; if( classLabel == 0 ){ errorLog << "predict(VectorDouble inputVector) - Class label of training example can not be zero!" << endl; return false; } //Find the index of the classLabel UINT classLabelIndex = 0; for(UINT j=0; j<numClasses; j++){ if( classLabel == classLabels[j] ){ classLabelIndex = j; break; } } classLikelihoods[ classLabelIndex ] += 1; classDistances[ classLabelIndex ] += neighbours[k].value; } //Get the max count double maxCount = classLikelihoods[0]; UINT maxIndex = 0; for(UINT i=1; i<classLikelihoods.size(); i++){ if( classLikelihoods[i] > maxCount ){ maxCount = classLikelihoods[i]; maxIndex = i; } } //Compute the average distances per class for(UINT i=0; i<classDistances.size(); i++){ if( classLikelihoods[i] > 0 ) classDistances[i] /= classLikelihoods[i]; else classDistances[i] = BIG_DISTANCE; } //Normalize the likelihoods for(UINT i=0; i<classLikelihoods.size(); i++){ classLikelihoods[i] /= double( neighbours.size() ); } //Set the maximum likelihood value maxLikelihood = classLikelihoods[ maxIndex ]; if( useNullRejection ){ if( classDistances[ maxIndex ] <= rejectionThresholds[ maxIndex ] ){ predictedClassLabel = classLabels[maxIndex]; }else{ predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; //Set the gesture label as the null label } }else{ predictedClassLabel = classLabels[maxIndex]; } return true; }
bool KNN::train_(ClassificationData &trainingData){ //Clear any previous models clear(); if( trainingData.getNumSamples() == 0 ){ errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl; return false; } //Get the ranges of the data ranges = trainingData.getRanges(); if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //Store the number of features, classes and the training data this->numInputDimensions = trainingData.getNumDimensions(); this->numClasses = trainingData.getNumClasses(); //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction this->trainingData = trainingData; //Set the class labels classLabels.resize(numClasses); for(UINT k=0; k<numClasses; k++){ classLabels[k] = trainingData.getClassTracker()[k].classLabel; } //If we do not need to search for the best K value, then call the sub training function and return the result if( !searchForBestKValue ){ return train_(trainingData,K); } //If we have got this far then we are going to search for the best K value UINT index = 0; double bestAccuracy = 0; vector< IndexedDouble > trainingAccuracyLog; for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){ //Randomly spilt the data and use 80% to train the algorithm and 20% to test it ClassificationData trainingSet(trainingData); ClassificationData testSet = trainingSet.partition(80,true); if( !train_(trainingSet, k) ){ errorLog << "Failed to train model for a k value of " << k << endl; }else{ //Compute the classification error double accuracy = 0; for(UINT i=0; i<testSet.getNumSamples(); i++){ VectorDouble sample = testSet[i].getSample(); if( !predict( sample , k) ){ errorLog << "Failed to predict label for test sample with a k value of " << k << endl; return false; } if( testSet[i].getClassLabel() == predictedClassLabel ){ accuracy++; } } accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0; trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) ); trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl; if( accuracy > bestAccuracy ){ bestAccuracy = accuracy; } index++; } } if( bestAccuracy > 0 ){ //Sort the training log by value std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending); //Copy the top matching values into a temporary buffer vector< IndexedDouble > tempLog; //Add the first value tempLog.push_back( trainingAccuracyLog[0] ); //Keep adding values until the value changes for(UINT i=1; i<trainingAccuracyLog.size(); i++){ if( trainingAccuracyLog[i].value == tempLog[0].value ){ tempLog.push_back( trainingAccuracyLog[i] ); }else break; } //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy) std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending); trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl; //Use the minimum index, this should give us the best accuracy with the minimum K value //We now need to train the model again to make sure all the training metrics are computed correctly return train_(trainingData,tempLog[0].index); } return false; }