bool PrincipalComponentAnalysis::setModel( const VectorFloat &mean, const MatrixFloat &eigenvectors ){

    if( (UINT)mean.size() != eigenvectors.getNumCols() ){
        return false;
    }

    trained = true;
    numInputDimensions = eigenvectors.getNumCols();
    numPrincipalComponents = eigenvectors.getNumRows();
    this->mean = mean;
    stdDev.clear();
    componentWeights.clear();
    eigenvalues.clear();
    sortedEigenvalues.clear();
    this->eigenvectors = eigenvectors;
    
    //The eigenvectors are already sorted, so the sorted eigenvalues just holds the default index
    for(UINT i=0; i<numPrincipalComponents; i++){
        sortedEigenvalues.push_back( IndexedDouble(i,0.0) );
    }
    return true;
}
bool PrincipalComponentAnalysis::computeFeatureVector_(const MatrixDouble &data,const UINT analysisMode) {

    trained = false;
    const UINT M = data.getNumRows();
    const UINT N = data.getNumCols();
    this->numInputDimensions = N;

    MatrixDouble msData( M, N );

    //Compute the mean and standard deviation of the input data
    mean = data.getMean();
    stdDev = data.getStdDev();

    if( normData ) {
        //Normalize the data
        for(UINT i=0; i<M; i++)
            for(UINT j=0; j<N; j++)
                msData[i][j] = (data[i][j]-mean[j]) / stdDev[j];

    } else {
        //Mean Subtract Data
        for(UINT i=0; i<M; i++)
            for(UINT j=0; j<N; j++)
                msData[i][j] = data[i][j] - mean[j];
    }

    //Get the covariance matrix
    MatrixDouble cov = msData.getCovarianceMatrix();

    //Use Eigen Value Decomposition to find eigenvectors of the covariance matrix
    EigenvalueDecomposition eig;

    if( !eig.decompose( cov ) ) {
        mean.clear();
        stdDev.clear();
        componentWeights.clear();
        sortedEigenvalues.clear();
        eigenvectors.clear();
        errorLog << "computeFeatureVector(const MatrixDouble &data,UINT analysisMode) - Failed to decompose input matrix!" << endl;
        return false;
    }

    //Get the eigenvectors and eigenvalues
    eigenvectors = eig.getEigenvectors();
    VectorDouble eigenvalues = eig.getRealEigenvalues();

    //Any eigenvalues less than 0 are not worth anything so set to 0
    for(UINT i=0; i<eigenvalues.size(); i++) {
        if( eigenvalues[i] < 0 )
            eigenvalues[i] = 0;
    }

    //Sort the eigenvalues and compute the component weights
    double sum = 0;
    UINT componentIndex = 0;
    sortedEigenvalues.clear();
    componentWeights.resize(N,0);

    while( true ) {
        double maxValue = 0;
        UINT index = 0;
        for(UINT i=0; i<eigenvalues.size(); i++) {
            if( eigenvalues[i] > maxValue ) {
                maxValue = eigenvalues[i];
                index = i;
            }
        }
        if( maxValue == 0 || componentIndex >= eigenvalues.size() ) {
            break;
        }
        sortedEigenvalues.push_back( IndexedDouble(index,maxValue) );
        componentWeights[ componentIndex++ ] = eigenvalues[ index ];
        sum += eigenvalues[ index ];
        eigenvalues[ index ] = 0; //Set the maxValue to zero so it won't be used again
    }

    double cumulativeVariance = 0;
    switch( analysisMode ) {
    case MAX_VARIANCE:
        //Normalize the component weights and workout how many components we need to use to reach the maxVariance
        numPrincipalComponents = 0;
        for(UINT k=0; k<N; k++) {
            componentWeights[k] /= sum;
            cumulativeVariance += componentWeights[k];
            if( cumulativeVariance >= maxVariance && numPrincipalComponents==0 ) {
                numPrincipalComponents = k+1;
            }
        }
        break;
    case MAX_NUM_PCS:
        //Normalize the component weights and compute the maxVariance
        maxVariance = 0;
        for(UINT k=0; k<N; k++) {
            componentWeights[k] /= sum;
            if( k < numPrincipalComponents ) {
                maxVariance += componentWeights[k];
            }
        }
        break;
    default:
        errorLog << "computeFeatureVector(const MatrixDouble &data,UINT analysisMode) - Unknown analysis mode!" << endl;
        break;
    }

    //Flag that the features have been computed
    trained = true;

    return true;
}
示例#3
0
bool KNN::train(LabelledClassificationData &trainingData){

	if( !searchForBestKValue ){
        return train_(trainingData,K);
    }

    UINT index = 0;
    double bestAccuracy = 0;
    vector< IndexedDouble > trainingAccuracyLog;

    for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
        //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
        LabelledClassificationData trainingSet(trainingData);
        LabelledClassificationData testSet = trainingSet.partition(80,true);

        if( !train_(trainingSet, k) ){
            errorLog << "Failed to train model for a k value of " << k << endl;
        }else{

            //Compute the classification error
            double accuracy = 0;
            for(UINT i=0; i<testSet.getNumSamples(); i++){

                vector< double > sample = testSet[i].getSample();

                if( !predict( sample ) ){
                    errorLog << "Failed to predict label for test sample with a k value of " << k << endl;
                    return false;
                }

                if( testSet[i].getClassLabel() == predictedClassLabel ){
                    accuracy++;
                }
            }

            accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0;
            trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
			
			trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl;

            if( accuracy > bestAccuracy ){
                bestAccuracy = accuracy;
            }

            index++;
        }

    }

    if( bestAccuracy > 0 ){
        //Sort the training log by value
        std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);

        //Copy the top matching values into a temporary buffer
        vector< IndexedDouble > tempLog;

        //Add the first value
        tempLog.push_back( trainingAccuracyLog[0] );

        //Keep adding values until the value changes
        for(UINT i=1; i<trainingAccuracyLog.size(); i++){
            if( trainingAccuracyLog[i].value == tempLog[0].value ){
                tempLog.push_back( trainingAccuracyLog[i] );
            }else break;
        }

        //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
        std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);

		trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl;

        //Use the minimum index, this should give us the best accuracy with the minimum K value
        return train_(trainingData,tempLog[0].index);
    }

    return false;
}
示例#4
0
bool KNN::predict(VectorDouble inputVector,UINT K){

    if( !trained ){
        errorLog << "predict(VectorDouble inputVector,UINT K) - KNN model has not been trained" << endl;
        return false;
    }

    if( inputVector.size() != numFeatures ){
        errorLog << "predict(VectorDouble inputVector) - the size of the input vector " << inputVector.size() << " does not match the number of features " << numFeatures <<  endl;
        return false;
    }

    if( K > trainingData.getNumSamples() ){
        errorLog << "predict(VectorDouble inputVector,UINT K) - K Is Greater Than The Number Of Training Samples" << endl;
        return false;
    }

    if( useScaling ){
        for(UINT i=0; i<inputVector.size(); i++){
            inputVector[i] = scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
        }
    }

    //TODO - need to build a kdtree of the training data to allow better realtime prediction
    const UINT M = trainingData.getNumSamples();
    vector< IndexedDouble > neighbours;

    for(UINT i=0; i<M; i++){
        double dist = 0;
        UINT classLabel = trainingData[i].getClassLabel();
        VectorDouble trainingSample = trainingData[i].getSample();

        switch( distanceMethod ){
            case EUCLIDEAN_DISTANCE:
                dist = computeEuclideanDistance(inputVector,trainingSample);
                break;
            case COSINE_DISTANCE:
                dist = computeCosineDistance(inputVector,trainingSample);
                break;
            case MANHATTAN_DISTANCE:
                dist = computeManhattanDistance(inputVector, trainingSample);
                break;
            default:
                errorLog << "predict(vector< double > inputVector) - unkown distance measure!" << endl;
                return false;
                break;
        }

        if( neighbours.size() < K ){
            neighbours.push_back( IndexedDouble(classLabel,dist) );
        }else{
            //Find the maximum value in the neighbours buffer
            double maxValue = neighbours[0].value;
            UINT maxIndex = 0;
            for(UINT n=1; n<neighbours.size(); n++){
                if( neighbours[n].value > maxValue ){
                    maxValue = neighbours[n].value;
                    maxIndex = n;
                }
            }

            //If the dist is less than the maximum value in the buffer, then replace that value with the new dist
            if( dist < maxValue ){
                neighbours[ maxIndex ] = IndexedDouble(classLabel,dist);
            }
        }
    }

    //Predict the class ID using the labels of the K nearest neighbours
    if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0);
    else for(UINT i=0; i<classLikelihoods.size(); i++){ classLikelihoods[i] = 0; }
    if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0);
    else for(UINT i=0; i<classDistances.size(); i++){ classDistances[i] = 0; }

    //Count the classes
    for(UINT k=0; k<neighbours.size(); k++){
        UINT classLabel = neighbours[k].index;
        if( classLabel == 0 ){
            errorLog << "predict(VectorDouble inputVector) - Class label of training example can not be zero!" << endl;
            return false;
        }

		//Find the index of the classLabel
		UINT classLabelIndex = 0;
		for(UINT j=0; j<numClasses; j++){
			if( classLabel == classLabels[j] ){
				classLabelIndex = j;
				break;
			}
		}
        classLikelihoods[ classLabelIndex ] += 1;
        classDistances[ classLabelIndex ] += neighbours[k].value;
    }

    //Get the max count
    double maxCount = classLikelihoods[0];
    UINT maxIndex = 0;
    for(UINT i=1; i<classLikelihoods.size(); i++){
        if( classLikelihoods[i] > maxCount ){
            maxCount = classLikelihoods[i];
            maxIndex = i;
        }
    }

    //Compute the average distances per class
    for(UINT i=0; i<classDistances.size(); i++){
        if( classLikelihoods[i] > 0 )   classDistances[i] /= classLikelihoods[i];
        else classDistances[i] = BIG_DISTANCE;
    }

    //Normalize the likelihoods
    for(UINT i=0; i<classLikelihoods.size(); i++){
        classLikelihoods[i] /= double( neighbours.size() );
    }

    //Set the maximum likelihood value
    maxLikelihood = classLikelihoods[ maxIndex ];

    if( useNullRejection ){
        if( classDistances[ maxIndex ] <= rejectionThresholds[ maxIndex ] ){
            predictedClassLabel = classLabels[maxIndex];
        }else{
            predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; //Set the gesture label as the null label
        }
    }else{
        predictedClassLabel = classLabels[maxIndex];
    }

    return true;
}
示例#5
0
bool KNN::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    //Get the ranges of the data
    ranges = trainingData.getRanges();
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Store the number of features, classes and the training data
    this->numInputDimensions = trainingData.getNumDimensions();
    this->numClasses = trainingData.getNumClasses();
    
    //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
    this->trainingData = trainingData;
    
    //Set the class labels
    classLabels.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
    }

    //If we do not need to search for the best K value, then call the sub training function and return the result
	if( !searchForBestKValue ){
        return train_(trainingData,K);
    }

    //If we have got this far then we are going to search for the best K value
    UINT index = 0;
    double bestAccuracy = 0;
    vector< IndexedDouble > trainingAccuracyLog;

    for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
        //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
        ClassificationData trainingSet(trainingData);
        ClassificationData testSet = trainingSet.partition(80,true);

        if( !train_(trainingSet, k) ){
            errorLog << "Failed to train model for a k value of " << k << endl;
        }else{

            //Compute the classification error
            double accuracy = 0;
            for(UINT i=0; i<testSet.getNumSamples(); i++){

                VectorDouble sample = testSet[i].getSample();

                if( !predict( sample , k) ){
                    errorLog << "Failed to predict label for test sample with a k value of " << k << endl;
                    return false;
                }

                if( testSet[i].getClassLabel() == predictedClassLabel ){
                    accuracy++;
                }
            }

            accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0;
            trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
			
			trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl;

            if( accuracy > bestAccuracy ){
                bestAccuracy = accuracy;
            }

            index++;
        }

    }

    if( bestAccuracy > 0 ){
        //Sort the training log by value
        std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);

        //Copy the top matching values into a temporary buffer
        vector< IndexedDouble > tempLog;

        //Add the first value
        tempLog.push_back( trainingAccuracyLog[0] );

        //Keep adding values until the value changes
        for(UINT i=1; i<trainingAccuracyLog.size(); i++){
            if( trainingAccuracyLog[i].value == tempLog[0].value ){
                tempLog.push_back( trainingAccuracyLog[i] );
            }else break;
        }

        //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
        std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);

		trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl;

        //Use the minimum index, this should give us the best accuracy with the minimum K value
        //We now need to train the model again to make sure all the training metrics are computed correctly
        return train_(trainingData,tempLog[0].index);
    }

    return false;
}