std::string TimeSeriesClassificationData::getStatsAsString() const{ std::string stats; stats += "DatasetName:\t" + datasetName + "\n"; stats += "DatasetInfo:\t" + infoText + "\n"; stats += "Number of Dimensions:\t" + Util::toString(numDimensions) + "\n"; stats += "Number of Samples:\t" + Util::toString(totalNumSamples) + "\n"; stats += "Number of Classes:\t" + Util::toString(getNumClasses()) + "\n"; stats += "ClassStats:\n"; for(UINT k=0; k<getNumClasses(); k++){ stats += "ClassLabel:\t" + Util::toString(classTracker[k].classLabel); stats += "\tNumber of Samples:\t" + Util::toString( classTracker[k].counter ); stats +="\tClassName:\t" + classTracker[k].className + "\n"; } Vector< MinMax > ranges = getRanges(); stats += "Dataset Ranges:\n"; for(UINT j=0; j<ranges.size(); j++){ stats += "[" + Util::toString( j+1 ) + "] Min:\t" + Util::toString( ranges[j].minValue ) + "\tMax: " + Util::toString( ranges[j].maxValue ) + "\n"; } stats += "Timeseries Lengths:\n"; UINT M = (UINT)data.size(); for(UINT j=0; j<M; j++){ stats += "ClassLabel: " + Util::toString( data[j].getClassLabel() ) + " Length:\t" + Util::toString( data[j].getLength() ) + "\n"; } return stats; }
bool TimeSeriesClassificationDataStream::printStats() const { cout << "DatasetName:\t" << datasetName << endl; cout << "DatasetInfo:\t" << infoText << endl; cout << "Number of Dimensions:\t" << numDimensions << endl; cout << "Number of Samples:\t" << totalNumSamples << endl; cout << "Number of Classes:\t" << getNumClasses() << endl; cout << "ClassStats:\n"; for(UINT k=0; k<getNumClasses(); k++){ cout << "ClassLabel:\t" << classTracker[k].classLabel; cout << "\tNumber of Samples:\t" << classTracker[k].counter; cout << "\tClassName:\t" << classTracker[k].className << endl; } cout << "TimeSeriesMarkerStats:\n"; for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){ cout << "ClassLabel: " << timeSeriesPositionTracker[i].getClassLabel(); cout << "\tStartIndex: " << timeSeriesPositionTracker[i].getStartIndex(); cout << "\tEndIndex: " << timeSeriesPositionTracker[i].getEndIndex(); cout << "\tLength: " << timeSeriesPositionTracker[i].getLength() << endl; } vector< MinMax > ranges = getRanges(); cout << "Dataset Ranges:\n"; for(UINT j=0; j<ranges.size(); j++){ cout << "[" << j+1 << "] Min:\t" << ranges[j].minValue << "\tMax: " << ranges[j].maxValue << endl; } return true; }
bool LabelledTimeSeriesClassificationData::printStats() const { cout << "DatasetName:\t" << datasetName << endl; cout << "DatasetInfo:\t" << infoText << endl; cout << "Number of Dimensions:\t" << numDimensions << endl; cout << "Number of Samples:\t" << totalNumSamples << endl; cout << "Number of Classes:\t" << getNumClasses() << endl; cout << "ClassStats:\n"; for(UINT k=0; k<getNumClasses(); k++){ cout << "ClassLabel:\t" << classTracker[k].classLabel; cout << "\tNumber of Samples:\t" << classTracker[k].counter; cout << "\tClassName:\t" << classTracker[k].className << endl; } vector< MinMax > ranges = getRanges(); cout << "Dataset Ranges:\n"; for(UINT j=0; j<ranges.size(); j++){ cout << "[" << j+1 << "] Min:\t" << ranges[j].minValue << "\tMax: " << ranges[j].maxValue << endl; } cout << "Timeseries Lengths:\n"; UINT M = (UINT)data.size(); for(UINT j=0; j<M; j++){ cout << "ClassLabel: " << data[j].getClassLabel() << " Length:\t" << data[j].getLength() << endl; } return true; }
Vector< UINT > ClassificationData::getClassLabels() const{ Vector< UINT > classLabels( getNumClasses(), 0 ); if( getNumClasses() == 0 ) return classLabels; for(UINT i=0; i<getNumClasses(); i++){ classLabels[i] = classTracker[i].classLabel; } return classLabels; }
Vector< UINT > ClassificationData::getNumSamplesPerClass() const{ Vector< UINT > classSampleCounts( getNumClasses(), 0 ); if( getNumSamples() == 0 ) return classSampleCounts; for(UINT i=0; i<getNumClasses(); i++){ classSampleCounts[i] = classTracker[i].counter; } return classSampleCounts; }
bool Project::renameClass(const std::string& currentName, const std::string& newName) { if (isLoaded()) { ofLogVerbose("Project::renameClass") << "Renaming class..."; ofFile file(_sketchDir.getAbsolutePath() + "/" + currentName + "." + SKETCH_FILE_EXTENSION); if (file.exists() && hasClasses()) { unsigned int numClasses = getNumClasses(); for (unsigned int i = 0; i < numClasses; ++i) { if (_data["classes"][i]["name"] == currentName) { _data["classes"][i]["name"] = newName; _data["classes"][i]["fileName"] = newName + "." + SKETCH_FILE_EXTENSION; file.renameTo(_sketchDir.getAbsolutePath() + "/" + newName + "." + SKETCH_FILE_EXTENSION); return true; } } } } return false; }
RegressionData ClassificationData::reformatAsRegressionData() const{ //Turns the classification into a regression data to enable regression algorithms like the MLP to be used as a classifier //This sets the number of targets in the regression data equal to the number of classes in the classification data //The output of each regression training sample will then be all 0's, except for the index matching the classLabel, which will be 1 //For this to work, the labelled classification data cannot have any samples with a classLabel of 0! RegressionData regressionData; if( totalNumSamples == 0 ){ return regressionData; } const UINT numInputDimensions = numDimensions; const UINT numTargetDimensions = getNumClasses(); regressionData.setInputAndTargetDimensions(numInputDimensions, numTargetDimensions); for(UINT i=0; i<totalNumSamples; i++){ VectorFloat targetVector(numTargetDimensions,0); //Set the class index in the target Vector to 1 and all other values in the target Vector to 0 UINT classLabel = data[i].getClassLabel(); if( classLabel > 0 ){ targetVector[ classLabel-1 ] = 1; }else{ regressionData.clear(); return regressionData; } regressionData.addSample(data[i].getSample(),targetVector); } return regressionData; }
double ConfusionMatrix::averageClassAccuracy(bool includeVoid) const { if (!normalized) { ConfusionMatrix normalizedConfusionMatrix(*this); normalizedConfusionMatrix.normalize(); assert(normalizedConfusionMatrix.isNormalized()); return normalizedConfusionMatrix.averageClassAccuracy(includeVoid); } utils::Average averageClassAccuracy; for (unsigned int label = 0; label < getNumClasses(); label++) { double classAccuracy = data(label, label); assertProbability(classAccuracy); bool ignore = false; if (!includeVoid && !ignoredLabels.empty()) for (LabelType ID: ignoredLabels) if (ID == label) { ignore = true; break; } if (ignore) continue; else averageClassAccuracy.addValue(classAccuracy); } return averageClassAccuracy.getAverage(); }
ClassificationData ClassificationData::getTestFoldData(const UINT foldIndex) const{ ClassificationData testData; testData.setNumDimensions( numDimensions ); testData.setAllowNullGestureClass( allowNullGestureClass ); if( !crossValidationSetup ) return testData; if( foldIndex >= kFoldValue ) return testData; //Add the class labels to make sure they all exist for(UINT k=0; k<getNumClasses(); k++){ testData.addClass( classTracker[k].classLabel, classTracker[k].className ); } testData.reserve( crossValidationIndexs[ foldIndex ].getSize() ); //Add the data to the test fold UINT index = 0; for(UINT i=0; i<crossValidationIndexs[ foldIndex ].getSize(); i++){ index = crossValidationIndexs[ foldIndex ][i]; testData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() ); } //Sort the class labels testData.sortClassLabels(); return testData; }
bool Classifier::setNullRejectionThresholds(VectorDouble newRejectionThresholds){ if( newRejectionThresholds.size() == getNumClasses() ){ nullRejectionThresholds = newRejectionThresholds; return true; } return false; }
MatrixDouble RandomForests::getLeafNodeFeatureWeights( const bool normWeights ) const{ if( !trained ) return MatrixDouble(); MatrixDouble weights( getNumClasses(), numInputDimensions ); weights.setAllValues(0.0); for(UINT i=0; i<forestSize; i++){ if( !forest[i]->computeLeafNodeWeights( weights ) ){ warningLog << "computeLeafNodeWeights( const bool normWeights ) - Failed to compute leaf node weights for tree: " << i << endl; } } //Normalize the weights if( normWeights ){ for(UINT j=0; j<weights.getNumCols(); j++){ double sum = 0.0; for(UINT i=0; i<weights.getNumRows(); i++){ sum += weights[i][j]; } if( sum != 0.0 ){ const double norm = 1.0 / sum; for(UINT i=0; i<weights.getNumRows(); i++){ weights[i][j] *= norm; } } } } return weights; }
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples) const{ Random rand; ClassificationData newDataset; newDataset.setNumDimensions( getNumDimensions() ); newDataset.setAllowNullGestureClass( allowNullGestureClass ); newDataset.setExternalRanges( externalRanges, useExternalRanges ); if( numSamples == 0 ) numSamples = totalNumSamples; newDataset.reserve( numSamples ); //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels for(UINT k=0; k<getNumClasses(); k++){ newDataset.addClass( classTracker[k].classLabel ); } //Randomly select the training samples to add to the new data set UINT randomIndex; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, totalNumSamples); newDataset.addSample(data[randomIndex].getClassLabel(), data[randomIndex].getSample()); } //Sort the class labels so they are in order newDataset.sortClassLabels(); return newDataset; }
ClassificationData ClassificationData::getTrainingFoldData(const UINT foldIndex) const{ ClassificationData trainingData; trainingData.setNumDimensions( numDimensions ); trainingData.setAllowNullGestureClass( allowNullGestureClass ); if( !crossValidationSetup ){ errorLog << "getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << std::endl; return trainingData; } if( foldIndex >= kFoldValue ) return trainingData; //Add the class labels to make sure they all exist for(UINT k=0; k<getNumClasses(); k++){ trainingData.addClass( classTracker[k].classLabel, classTracker[k].className ); } //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex UINT index = 0; for(UINT k=0; k<kFoldValue; k++){ if( k != foldIndex ){ for(UINT i=0; i<crossValidationIndexs[k].getSize(); i++){ index = crossValidationIndexs[k][i]; trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() ); } } } //Sort the class labels trainingData.sortClassLabels(); return trainingData; }
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples,bool balanceDataset) const{ Random rand; ClassificationData newDataset; newDataset.setNumDimensions( getNumDimensions() ); newDataset.setAllowNullGestureClass( allowNullGestureClass ); newDataset.setExternalRanges( externalRanges, useExternalRanges ); if( numSamples == 0 ) numSamples = totalNumSamples; newDataset.reserve( numSamples ); const UINT K = getNumClasses(); //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels for(UINT k=0; k<K; k++){ newDataset.addClass( classTracker[k].classLabel ); } if( balanceDataset ){ //Group the class indexs Vector< Vector< UINT > > classIndexs( K ); for(UINT i=0; i<totalNumSamples; i++){ classIndexs[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i ); } //Get the class with the minimum number of examples UINT numSamplesPerClass = (UINT)floor( numSamples / Float(K) ); //Randomly select the training samples from each class UINT classIndex = 0; UINT classCounter = 0; UINT randomIndex = 0; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, (UINT)classIndexs[ classIndex ].size() ); randomIndex = classIndexs[ classIndex ][ randomIndex ]; newDataset.addSample(data[ randomIndex ].getClassLabel(), data[ randomIndex ].getSample()); if( classCounter++ >= numSamplesPerClass && classIndex+1 < K ){ classCounter = 0; classIndex++; } } }else{ //Randomly select the training samples to add to the new data set UINT randomIndex; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, totalNumSamples); newDataset.addSample( data[randomIndex].getClassLabel(), data[randomIndex].getSample() ); } } //Sort the class labels so they are in order newDataset.sortClassLabels(); return newDataset; }
Vector< MatrixFloat > ClassificationData::getHistogramData(UINT numBins) const{ const UINT K = getNumClasses(); Vector< MatrixFloat > histData(K); for(UINT k=0; k<K; k++){ histData[k] = getClassHistogramData( classTracker[k].classLabel, numBins ); } return histData; }
void ConfusionMatrix::normalize() { if (normalized) { throw std::runtime_error("confusion matrix is already normalized"); } cuv::ndarray<double, cuv::host_memory_space> sums(getNumClasses()); const unsigned int numClasses = getNumClasses(); for (unsigned int label = 0; label < numClasses; label++) { sums(label) = 0.0; for (unsigned int prediction = 0; prediction < numClasses; prediction++) { double value = static_cast<double>(data(label, prediction)); sums(label) += value; } } for (unsigned int label = 0; label < numClasses; label++) { if (sums(label) == 0.0) continue; for (unsigned int prediction = 0; prediction < numClasses; prediction++) { data(label, prediction) /= sums(label); } } #ifndef NDEBUG for (unsigned int label = 0; label < numClasses; label++) { double sum = 0.0; for (unsigned int prediction = 0; prediction < numClasses; prediction++) { double v = static_cast<double>(data(label, prediction)); assert(v >= 0.0 && v <= 1.0); sum += v; } assert(sum == 0.0 || abs(1.0 - sum) < 1e-6); } #endif normalized = true; }
MatrixFloat ClassificationData::getClassMean() const{ MatrixFloat mean(getNumClasses(),numDimensions); VectorFloat counter(getNumClasses(),0); mean.setAllValues( 0 ); for(UINT i=0; i<totalNumSamples; i++){ UINT classIndex = getClassLabelIndexValue( data[i].getClassLabel() ); for(UINT j=0; j<numDimensions; j++){ mean[classIndex][j] += data[i][j]; } counter[ classIndex ]++; } for(UINT k=0; k<getNumClasses(); k++){ for(UINT j=0; j<numDimensions; j++){ mean[k][j] = counter[k] > 0 ? mean[k][j]/counter[k] : 0; } } return mean; }
void ConfusionMatrix::operator+=(const ConfusionMatrix& other) { if (normalized) { throw std::runtime_error("confusion matrix is already normalized"); } if (other.getNumClasses() != getNumClasses()) { std::ostringstream o; o << "different number of classes in confusion matrix: " << this->getNumClasses() << " and " << other.getNumClasses(); throw std::runtime_error(o.str()); } data += other.data; }
string LabelledClassificationData::getStatsAsString() const{ string statsText; statsText += "DatasetName:\t" + datasetName + "\n"; statsText += "DatasetInfo:\t" + infoText + "\n"; statsText += "Number of Dimensions:\t" + Util::toString( numDimensions ) + "\n"; statsText += "Number of Samples:\t" + Util::toString( totalNumSamples ) + "\n"; statsText += "Number of Classes:\t" + Util::toString( getNumClasses() ) + "\n"; statsText += "ClassStats:\n"; for(UINT k=0; k<getNumClasses(); k++){ statsText += "ClassLabel:\t" + Util::toString( classTracker[k].classLabel ); statsText += "\tNumber of Samples:\t" + Util::toString(classTracker[k].counter); statsText += "\tClassName:\t" + classTracker[k].className + "\n"; } vector< MinMax > ranges = getRanges(); statsText += "Dataset Ranges:\n"; for(UINT j=0; j<ranges.size(); j++){ statsText += "[" + Util::toString( j+1 ) + "] Min:\t" + Util::toString( ranges[j].minValue ) + "\tMax: " + Util::toString( ranges[j].maxValue ) + "\n"; } return statsText; }
MatrixFloat ClassificationData::getClassStdDev() const{ MatrixFloat mean = getClassMean(); MatrixFloat stdDev(getNumClasses(),numDimensions); VectorFloat counter(getNumClasses(),0); stdDev.setAllValues( 0 ); for(UINT i=0; i<totalNumSamples; i++){ UINT classIndex = getClassLabelIndexValue( data[i].getClassLabel() ); for(UINT j=0; j<numDimensions; j++){ stdDev[classIndex][j] += SQR(data[i][j]-mean[classIndex][j]); } counter[ classIndex ]++; } for(UINT k=0; k<getNumClasses(); k++){ for(UINT j=0; j<numDimensions; j++){ stdDev[k][j] = sqrt( stdDev[k][j] / Float(counter[k]-1) ); } } return stdDev; }
bool Project::isClassName(const std::string& className) const { if (hasClasses()) { unsigned int numClasses = getNumClasses(); for (unsigned int i = 0; i < numClasses; ++i) { if (_data["classes"][i]["name"] == className) { return true; } } } return false; }
Json::Value Project::createClass(const std::string& className) { std::string fileContents = _classFileTemplate; ofStringReplace(fileContents, "<classname>", className); ofLogVerbose("Project::createClass") << "fileContents: "<< fileContents; Json::Value classFile; // TODO: Load extension from settings classFile["fileName"] = className + "." + SKETCH_FILE_EXTENSION; classFile["name"] = className; classFile["fileContents"] = fileContents; _data["classes"][getNumClasses()] = classFile; // TODO: re-loading is a terribly slow way to delete. Come back and optimize. // Simply need to remove the Json::Value class in _data["classes"] _saveFile(classFile); return classFile; }
// ----------------------------------------------------------------------------- // // ----------------------------------------------------------------------------- void MultiEmmpmFilter::readFilterParameters(AbstractFilterParametersReader* reader, int index) { reader->openFilterGroup(this, index); setInputDataArrayVector(reader->readDataArrayPathVector("InputDataArrayVector", getInputDataArrayVector())); setNumClasses(reader->readValue("NumClasses", getNumClasses())); setExchangeEnergy(reader->readValue("ExchangeEnergy", getExchangeEnergy())); setHistogramLoops(reader->readValue("HistogramLoops", getHistogramLoops())); setSegmentationLoops(reader->readValue("SegmentationLoops", getSegmentationLoops())); setUseSimulatedAnnealing(reader->readValue("UseSimulatedAnnealing", getUseSimulatedAnnealing())); setUseGradientPenalty(reader->readValue("UseGradientPenalty", getUseGradientPenalty())); setGradientPenalty(reader->readValue("GradientPenalty", getGradientPenalty())); setUseCurvaturePenalty(reader->readValue("UseCurvaturePenalty", getUseCurvaturePenalty())); setCurvaturePenalty(reader->readValue("CurvaturePenalty", getCurvaturePenalty())); setRMax(reader->readValue("RMax", getRMax())); setEMLoopDelay(reader->readValue("EMLoopDelay", getEMLoopDelay())); setOutputAttributeMatrixName(reader->readString("OutputAttributeMatrixName", getOutputAttributeMatrixName())); setUsePreviousMuSigma(reader->readValue("UsePreviousMuSigma", getUsePreviousMuSigma())); setOutputArrayPrefix(reader->readString("OutputArrayPrefix", getOutputArrayPrefix())); reader->closeFilterGroup(); }
double ConfusionMatrix::averageClassAccuracy(bool includeVoid) const { if (!normalized) { ConfusionMatrix normalizedConfusionMatrix(*this); normalizedConfusionMatrix.normalize(); assert(normalizedConfusionMatrix.isNormalized()); return normalizedConfusionMatrix.averageClassAccuracy(includeVoid); } utils::Average averageClassAccuracy; for (unsigned int label = 0; label < getNumClasses(); label++) { double classAccuracy = data(label, label); assertProbability(classAccuracy); if (includeVoid || label > 0) { averageClassAccuracy.addValue(classAccuracy); } } return averageClassAccuracy.getAverage(); }
VectorFloat ClassificationData::getClassProbabilities( const Vector< UINT > &classLabels ) const { const UINT K = (UINT)classLabels.size(); const UINT N = getNumClasses(); Float sum = 0; VectorFloat x(K,0); for(UINT k=0; k<K; k++){ for(UINT n=0; n<N; n++){ if( classLabels[k] == classTracker[n].classLabel ){ x[k] = classTracker[n].counter; sum += classTracker[n].counter; break; } } } //Normalize the class probabilities if( sum > 0 ){ for(UINT k=0; k<K; k++){ x[k] /= sum; } } return x; }
Vector< UINT > ClassificationData::getClassDataIndexes(UINT classLabel) const{ const UINT M = getNumSamples(); const UINT K = getNumClasses(); UINT N = 0; //Get the number of samples in the class for(UINT k=0; k<K; k++){ if( classTracker[k].classLabel == classLabel){ N = classTracker[k].counter; break; } } UINT index = 0; Vector< UINT > classIndexes(N); for(UINT i=0; i<M; i++){ if( data[i].getClassLabel() == classLabel ){ classIndexes[index++] = i; } } return classIndexes; }
bool TimeSeriesClassificationData::spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling){ crossValidationSetup = false; crossValidationIndexs.clear(); //K can not be zero if( K == 0 ){ errorLog << "spiltDataIntoKFolds(UINT K) - K can not be zero!" << std::endl; return false; } //K can not be larger than the number of examples if( K > totalNumSamples ){ errorLog << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << std::endl; return false; } //K can not be larger than the number of examples in a specific class if the stratified sampling option is true if( useStratifiedSampling ){ for(UINT c=0; c<classTracker.size(); c++){ if( K > classTracker[c].counter ){ errorLog << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl; return false; } } } //Setup the dataset for k-fold cross validation kFoldValue = K; Vector< UINT > indexs( totalNumSamples ); //Work out how many samples are in each fold, the last fold might have more samples than the others UINT numSamplesPerFold = (UINT) floor( totalNumSamples/Float(K) ); //Resize the cross validation indexs buffer crossValidationIndexs.resize( K ); //Create the random partion indexs Random random; UINT randomIndex = 0; if( useStratifiedSampling ){ //Break the data into seperate classes Vector< Vector< UINT > > classData( getNumClasses() ); //Add the indexs to their respective classes for(UINT i=0; i<totalNumSamples; i++){ classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i ); } //Randomize the order of the indexs in each of the class index buffers for(UINT c=0; c<getNumClasses(); c++){ UINT numSamples = (UINT)classData[c].size(); for(UINT x=0; x<numSamples; x++){ //Pick a random index randomIndex = random.getRandomNumberInt(0,numSamples); //Swap the indexs SWAP( classData[c][ x ] , classData[c][ randomIndex ] ); } } //Loop over each of the classes and add the data equally to each of the k folds until there is no data left Vector< UINT >::iterator iter; for(UINT c=0; c<getNumClasses(); c++){ iter = classData[ c ].begin(); UINT k = 0; while( iter != classData[c].end() ){ crossValidationIndexs[ k ].push_back( *iter ); iter++; k++; k = k % K; } } }else{ //Randomize the order of the data for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i; for(UINT x=0; x<totalNumSamples; x++){ //Pick a random index randomIndex = random.getRandomNumberInt(0,totalNumSamples); //Swap the indexs SWAP( indexs[ x ] , indexs[ randomIndex ] ); } UINT counter = 0; UINT foldIndex = 0; for(UINT i=0; i<totalNumSamples; i++){ //Add the index to the current fold crossValidationIndexs[ foldIndex ].push_back( indexs[i] ); //Move to the next fold if ready if( ++counter == numSamplesPerFold && foldIndex < K-1 ){ foldIndex++; counter = 0; } } } crossValidationSetup = true; return true; }
TimeSeriesClassificationData TimeSeriesClassificationData::split(const UINT trainingSizePercentage,const bool useStratifiedSampling){ //Partitions the dataset into a training dataset (which is kept by this instance of the TimeSeriesClassificationData) and //a testing/validation dataset (which is return as a new instance of the TimeSeriesClassificationData). The trainingSizePercentage //therefore sets the size of the data which remains in this instance and the remaining percentage of data is then added to //the testing/validation dataset //The dataset has changed so flag that any previous cross validation setup will now not work crossValidationSetup = false; crossValidationIndexs.clear(); TimeSeriesClassificationData trainingSet(numDimensions); TimeSeriesClassificationData testSet(numDimensions); trainingSet.setAllowNullGestureClass(allowNullGestureClass); testSet.setAllowNullGestureClass(allowNullGestureClass); Vector< UINT > indexs( totalNumSamples ); //Create the random partion indexs Random random; UINT randomIndex = 0; if( useStratifiedSampling ){ //Break the data into seperate classes Vector< Vector< UINT > > classData( getNumClasses() ); //Add the indexs to their respective classes for(UINT i=0; i<totalNumSamples; i++){ classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i ); } //Randomize the order of the indexs in each of the class index buffers for(UINT k=0; k<getNumClasses(); k++){ UINT numSamples = (UINT)classData[k].size(); for(UINT x=0; x<numSamples; x++){ //Pick a random index randomIndex = random.getRandomNumberInt(0,numSamples); //Swap the indexs SWAP( classData[k][ x ] ,classData[k][ randomIndex ] ); } } //Loop over each class and add the data to the trainingSet and testSet for(UINT k=0; k<getNumClasses(); k++){ UINT numTrainingExamples = (UINT) floor( Float(classData[k].size()) / 100.0 * Float(trainingSizePercentage) ); //Add the data to the training and test sets for(UINT i=0; i<numTrainingExamples; i++){ trainingSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getData() ); } for(UINT i=numTrainingExamples; i<classData[k].size(); i++){ testSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getData() ); } } //Overwrite the training data in this instance with the training data of the trainingSet data = trainingSet.getClassificationData(); totalNumSamples = trainingSet.getNumSamples(); }else{ const UINT numTrainingExamples = (UINT) floor( Float(totalNumSamples) / 100.0 * Float(trainingSizePercentage) ); //Create the random partion indexs Random random; for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i; for(UINT x=0; x<totalNumSamples; x++){ //Pick a random index randomIndex = random.getRandomNumberInt(0,totalNumSamples); //Swap the indexs SWAP( indexs[ x ] , indexs[ randomIndex ] ); } //Add the data to the training and test sets for(UINT i=0; i<numTrainingExamples; i++){ trainingSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getData() ); } for(UINT i=numTrainingExamples; i<totalNumSamples; i++){ testSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getData() ); } //Overwrite the training data in this instance with the training data of the trainingSet data = trainingSet.getClassificationData(); totalNumSamples = trainingSet.getNumSamples(); } return testSet; }
LabelImage RandomForestImage::predict(const RGBDImage& image, cuv::ndarray<float, cuv::host_memory_space>* probabilities, const bool onGPU, bool useDepthImages) const { LabelImage prediction(image.getWidth(), image.getHeight()); const LabelType numClasses = getNumClasses(); if (treeData.size() != ensemble.size()) { throw std::runtime_error((boost::format("tree data size: %d, ensemble size: %d. histograms normalized?") % treeData.size() % ensemble.size()).str()); } cuv::ndarray<float, cuv::host_memory_space> hostProbabilities( cuv::extents[numClasses][image.getHeight()][image.getWidth()], m_predictionAllocator); if (onGPU) { cuv::ndarray<float, cuv::dev_memory_space> deviceProbabilities( cuv::extents[numClasses][image.getHeight()][image.getWidth()], m_predictionAllocator); cudaSafeCall(cudaMemset(deviceProbabilities.ptr(), 0, static_cast<size_t>(deviceProbabilities.size() * sizeof(float)))); { utils::Profile profile("classifyImagesGPU"); for (const boost::shared_ptr<const TreeNodes>& data : treeData) { classifyImage(treeData.size(), deviceProbabilities, image, numClasses, data, useDepthImages); } } normalizeProbabilities(deviceProbabilities); cuv::ndarray<LabelType, cuv::dev_memory_space> output(image.getHeight(), image.getWidth(), m_predictionAllocator); determineMaxProbabilities(deviceProbabilities, output); hostProbabilities = deviceProbabilities; cuv::ndarray<LabelType, cuv::host_memory_space> outputHost(image.getHeight(), image.getWidth(), m_predictionAllocator); outputHost = output; { utils::Profile profile("setLabels"); for (int y = 0; y < image.getHeight(); ++y) { for (int x = 0; x < image.getWidth(); ++x) { prediction.setLabel(x, y, static_cast<LabelType>(outputHost(y, x))); } } } } else { utils::Profile profile("classifyImagesCPU"); tbb::parallel_for(tbb::blocked_range<size_t>(0, image.getHeight()), [&](const tbb::blocked_range<size_t>& range) { for(size_t y = range.begin(); y != range.end(); y++) { for(int x=0; x < image.getWidth(); x++) { for (LabelType label = 0; label < numClasses; label++) { hostProbabilities(label, y, x) = 0.0f; } for (const auto& tree : ensemble) { const auto& t = tree->getTree(); PixelInstance pixel(&image, 0, x, y); const auto& hist = t->classifySoft(pixel); assert(hist.size() == numClasses); for(LabelType label = 0; label<hist.size(); label++) { hostProbabilities(label, y, x) += hist[label]; } } double sum = 0.0f; for (LabelType label = 0; label < numClasses; label++) { sum += hostProbabilities(label, y, x); } float bestProb = -1.0f; for (LabelType label = 0; label < numClasses; label++) { hostProbabilities(label, y, x) /= sum; float prob = hostProbabilities(label, y, x); if (prob > bestProb) { prediction.setLabel(x, y, label); bestProb = prob; } } } } }); } if (probabilities) { *probabilities = hostProbabilities; } return prediction; }
LabelImage RandomForestImage::improveHistograms(const RGBDImage& image, const LabelImage& labelImage, const bool onGPU, bool useDepthImages) const { LabelImage prediction(image.getWidth(), image.getHeight()); const LabelType numClasses = getNumClasses(); if (treeData.size() != ensemble.size()) { throw std::runtime_error((boost::format("tree data size: %d, ensemble size: %d. histograms normalized?") % treeData.size() % ensemble.size()).str()); } cuv::ndarray<float, cuv::host_memory_space> hostProbabilities( cuv::extents[numClasses][image.getHeight()][image.getWidth()], m_predictionAllocator); //These offsets should have been used instead of traversing to the leaf again /* cuv::ndarray<unsigned int, cuv::dev_memory_space> nodeOffsets( cuv::extents[image.getHeight()][image.getWidth()], m_predictionAllocator); */ if (onGPU) { cuv::ndarray<float, cuv::dev_memory_space> deviceProbabilities( cuv::extents[numClasses][image.getHeight()][image.getWidth()], m_predictionAllocator); cudaSafeCall(cudaMemset(deviceProbabilities.ptr(), 0, static_cast<size_t>(deviceProbabilities.size() * sizeof(float)))); { utils::Profile profile("classifyImagesGPU"); for (const boost::shared_ptr<const TreeNodes>& data : treeData) { classifyImage(treeData.size(), deviceProbabilities, image, numClasses, data, useDepthImages); bool found_tree = false; //should be change to parallel for and add lock for (size_t treeNr = 0; treeNr < ensemble.size(); treeNr++) { if (data->getTreeId() == ensemble[treeNr]->getId()) { found_tree =true; const boost::shared_ptr<RandomTree<PixelInstance, ImageFeatureFunction> >& tree = ensemble[treeNr]->getTree(); //this should have been used and done before trying to classify the images, since it doesn't change //std::vector<size_t> leafSet; //tree->collectLeafNodes(leafSet); for (int y = 0; y < image.getHeight(); y++) for (int x = 0; x < image.getWidth(); x++) { LabelType label = labelImage.getLabel(x,y); if (!shouldIgnoreLabel(label)) { PixelInstance pixel(&image, label, x, y); //This should be changed. When classifying the image, the nodeoffsets should be returned and those used directly //instead of traversing again to the leaves. As a test, can check if the nodeoffset is the same as the one returned //by travertoleaf tree->setAllPixelsHistogram(pixel); } } } if (found_tree) break; } } } } //should also add the CPU code! return prediction; }