示例#1
0
bool KMeans::train_(UnlabelledData &trainingData){

    //Convert the training data into one matrix
	UINT M = trainingData.getNumSamples();
    UINT N = trainingData.getNumDimensions();
    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }
	
	return train_(data);
}
bool HierarchicalClustering::train_(ClassificationData &trainingData){
	
	if( trainingData.getNumSamples() == 0 ){
		return false;
	}

    //Convert the labelled training data to a training matrix
	M = trainingData.getNumSamples();
    N = trainingData.getNumDimensions();

    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }

	return train_( data );
}
示例#3
0
bool KMeans::train_(ClassificationData &trainingData){
	
	if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - The training data is empty!" << std::endl;
		return false;
	}
	
	//Set the numClusters as the number of classes in the training data
	numClusters = trainingData.getNumClasses();

    //Convert the labelled training data to a training matrix
	UINT M = trainingData.getNumSamples();
    UINT N = trainingData.getNumDimensions();
    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }

    //Run the K-Means algorithm
    return train_( data );
}
示例#4
0
bool GaussianMixtureModels::train_(ClassificationData &trainingData){
    MatrixFloat data = trainingData.getDataAsMatrixFloat();
    return train_( data );
}
示例#5
0
bool MLBase::train(RegressionData trainingData){ return train_( trainingData ); }
示例#6
0
bool MLBase::train(MatrixFloat data){ return train_( data ); }
示例#7
0
bool SelfOrganizingMap::train_(UnlabelledData &trainingData){
    MatrixFloat data = trainingData.getDataAsMatrixFloat();
    return train_(data);
}
示例#8
0
bool KMeansFeatures::train_(UnlabelledData &trainingData){
	MatrixDouble data = trainingData.getDataAsMatrixDouble();
    return train_( data );
}
示例#9
0
bool KNN::train(LabelledClassificationData &trainingData){

	if( !searchForBestKValue ){
        return train_(trainingData,K);
    }

    UINT index = 0;
    double bestAccuracy = 0;
    vector< IndexedDouble > trainingAccuracyLog;

    for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
        //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
        LabelledClassificationData trainingSet(trainingData);
        LabelledClassificationData testSet = trainingSet.partition(80,true);

        if( !train_(trainingSet, k) ){
            errorLog << "Failed to train model for a k value of " << k << endl;
        }else{

            //Compute the classification error
            double accuracy = 0;
            for(UINT i=0; i<testSet.getNumSamples(); i++){

                vector< double > sample = testSet[i].getSample();

                if( !predict( sample ) ){
                    errorLog << "Failed to predict label for test sample with a k value of " << k << endl;
                    return false;
                }

                if( testSet[i].getClassLabel() == predictedClassLabel ){
                    accuracy++;
                }
            }

            accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0;
            trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
			
			trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl;

            if( accuracy > bestAccuracy ){
                bestAccuracy = accuracy;
            }

            index++;
        }

    }

    if( bestAccuracy > 0 ){
        //Sort the training log by value
        std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);

        //Copy the top matching values into a temporary buffer
        vector< IndexedDouble > tempLog;

        //Add the first value
        tempLog.push_back( trainingAccuracyLog[0] );

        //Keep adding values until the value changes
        for(UINT i=1; i<trainingAccuracyLog.size(); i++){
            if( trainingAccuracyLog[i].value == tempLog[0].value ){
                tempLog.push_back( trainingAccuracyLog[i] );
            }else break;
        }

        //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
        std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);

		trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl;

        //Use the minimum index, this should give us the best accuracy with the minimum K value
        return train_(trainingData,tempLog[0].index);
    }

    return false;
}
示例#10
0
bool KNN::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    //Get the ranges of the data
    ranges = trainingData.getRanges();
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Store the number of features, classes and the training data
    this->numInputDimensions = trainingData.getNumDimensions();
    this->numClasses = trainingData.getNumClasses();
    
    //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
    this->trainingData = trainingData;
    
    //Set the class labels
    classLabels.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
    }

    //If we do not need to search for the best K value, then call the sub training function and return the result
	if( !searchForBestKValue ){
        return train_(trainingData,K);
    }

    //If we have got this far then we are going to search for the best K value
    UINT index = 0;
    double bestAccuracy = 0;
    vector< IndexedDouble > trainingAccuracyLog;

    for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
        //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
        ClassificationData trainingSet(trainingData);
        ClassificationData testSet = trainingSet.partition(80,true);

        if( !train_(trainingSet, k) ){
            errorLog << "Failed to train model for a k value of " << k << endl;
        }else{

            //Compute the classification error
            double accuracy = 0;
            for(UINT i=0; i<testSet.getNumSamples(); i++){

                VectorDouble sample = testSet[i].getSample();

                if( !predict( sample , k) ){
                    errorLog << "Failed to predict label for test sample with a k value of " << k << endl;
                    return false;
                }

                if( testSet[i].getClassLabel() == predictedClassLabel ){
                    accuracy++;
                }
            }

            accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0;
            trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
			
			trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl;

            if( accuracy > bestAccuracy ){
                bestAccuracy = accuracy;
            }

            index++;
        }

    }

    if( bestAccuracy > 0 ){
        //Sort the training log by value
        std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);

        //Copy the top matching values into a temporary buffer
        vector< IndexedDouble > tempLog;

        //Add the first value
        tempLog.push_back( trainingAccuracyLog[0] );

        //Keep adding values until the value changes
        for(UINT i=1; i<trainingAccuracyLog.size(); i++){
            if( trainingAccuracyLog[i].value == tempLog[0].value ){
                tempLog.push_back( trainingAccuracyLog[i] );
            }else break;
        }

        //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
        std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);

		trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl;

        //Use the minimum index, this should give us the best accuracy with the minimum K value
        //We now need to train the model again to make sure all the training metrics are computed correctly
        return train_(trainingData,tempLog[0].index);
    }

    return false;
}
bool KMeansFeatures::train_(LabelledContinuousTimeSeriesClassificationData &trainingData){
    MatrixDouble data = trainingData.getDataAsMatrixDouble();
    return train_( data );
}
示例#12
0
bool RBMQuantizer::train_(TimeSeriesClassificationDataStream &trainingData){
    MatrixDouble data = trainingData.getDataAsMatrixDouble();
    return train_( data );
}
示例#13
0
bool RBMQuantizer::train_(ClassificationData &trainingData){
    MatrixDouble data = trainingData.getDataAsMatrixDouble();
    return train_( data );
}
示例#14
0
文件: em.cpp 项目: 2december/opencv
 bool train(const Ptr<TrainData>& data, int)
 {
     Mat samples = data->getTrainSamples(), labels;
     return train_(samples, labels, noArray(), noArray());
 }
示例#15
0
bool KMeansFeatures::train_(ClassificationData &trainingData){
    MatrixDouble data = trainingData.getDataAsMatrixDouble();
    return train_( data );
}
示例#16
0
bool KMeansFeatures::train_(TimeSeriesClassificationDataStream &trainingData){
    MatrixDouble data = trainingData.getDataAsMatrixDouble();
    return train_( data );
}
示例#17
0
bool MLBase::train(RegressionData trainingData,RegressionData validationData){ return train_( trainingData, validationData ); }
示例#18
0
bool SelfOrganizingMap::train_(ClassificationData &trainingData){
    MatrixFloat data = trainingData.getDataAsMatrixFloat();
    return train_(data);
}
/*bool batchTrain(Vector<UINT> &obs)
- This method 
*/
bool HiddenMarkovModel::train(const vector< vector<UINT> > &trainingData){

    //Clear any previous models
    modelTrained = false;
    observationSequence.clear();
    estimatedStates.clear();
    trainingIterationLog.clear();
    
	UINT n,currentIter, bestIndex = 0;
	double newLoglikelihood, bestLogValue = 0;
    
    if( numRandomTrainingIterations > 1 ){

        //A buffer to keep track each AB matrix
        vector< MatrixDouble > aTracker( numRandomTrainingIterations );
        vector< MatrixDouble > bTracker( numRandomTrainingIterations );
        vector< double > loglikelihoodTracker( numRandomTrainingIterations );
        
        UINT maxNumTestIter = maxNumIter > 10 ? 10 : maxNumIter;

        //Try and find the best starting point
        for(n=0; n<numRandomTrainingIterations; n++){
            //Reset the model to a new random starting values
            randomizeMatrices(numStates,numSymbols);

            if( !train_(trainingData,maxNumTestIter,currentIter,newLoglikelihood) ){
                return false;
            }
            aTracker[n] = a;
            bTracker[n] = b;
            loglikelihoodTracker[n] = newLoglikelihood;
        }

        //Get the best result and set it as the a and b starting values
        bestIndex = 0;
        bestLogValue = loglikelihoodTracker[0];
        for(n=1; n<numRandomTrainingIterations; n++){
            if(bestLogValue < loglikelihoodTracker[n]){
                bestLogValue = loglikelihoodTracker[n];
                bestIndex = n;
            }
        }

        //Set a and b
        a = aTracker[bestIndex];
        b = bTracker[bestIndex];
        
    }else{
        randomizeMatrices(numStates,numSymbols);
    }

	//Perform the actual training
    if( !train_(trainingData,maxNumIter,currentIter,newLoglikelihood) ){
        return false;
    }

	//Calculate the observationSequence buffer length
	const UINT numObs = (unsigned int)trainingData.size();
	UINT k = 0;
    UINT averageObsLength = 0;
	for(k=0; k<numObs; k++){
		const UINT T = (unsigned int)trainingData[k].size();
		averageObsLength += T;
	}
    
    averageObsLength = (UINT)floor( averageObsLength/double(numObs) );
    observationSequence.resize( averageObsLength );
    estimatedStates.resize( averageObsLength );
    
    //Finally, flag that the model was trained
    modelTrained = true;

	return true;
}
示例#20
0
bool MLBase::train(UnlabelledData trainingData){ return train_( trainingData ); }
示例#21
0
bool RBMQuantizer::train_(TimeSeriesClassificationData &trainingData){
    MatrixFloat data = trainingData.getDataAsMatrixFloat();
    return train_( data );
}
示例#22
0
bool MLBase::train(ClassificationData trainingData){ return train_( trainingData ); }
示例#23
0
bool RBMQuantizer::train_(ClassificationDataStream &trainingData){
    MatrixFloat data = trainingData.getDataAsMatrixFloat();
    return train_( data );
}
示例#24
0
bool MLBase::train(TimeSeriesClassificationData trainingData){ return train_( trainingData ); }
示例#25
0
bool RBMQuantizer::train_(UnlabelledData &trainingData){
    MatrixFloat data = trainingData.getDataAsMatrixFloat();
    return train_( data );
}
示例#26
0
bool GaussianMixtureModels::train_(UnlabelledData &trainingData){
    MatrixFloat data = trainingData.getDataAsMatrixFloat();
    return train_( data );
}
示例#27
0
文件: MLBase.cpp 项目: daltonb/grt
bool MLBase::train(MatrixDouble data){ return train_( data ); }