bool MultidimensionalRegression::predict_(VectorFloat &inputVector){ if( !trained ){ errorLog << "predict_(VectorFloat &inputVector) - Model Not Trained!" << std::endl; return false; } if( !trained ) return false; if( inputVector.getSize() != numInputDimensions ){ errorLog << "predict_(VectorFloat &inputVector) - The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl; return false; } if( useScaling ){ for(UINT n=0; n<numInputDimensions; n++){ inputVector[n] = grt_scale(inputVector[n], inputVectorRanges[n].minValue, inputVectorRanges[n].maxValue, 0.0, 1.0); } } for(UINT n=0; n<numOutputDimensions; n++){ if( !regressionModules[ n ]->predict( inputVector ) ){ errorLog << "predict_(VectorFloat &inputVector) - Failed to predict for regression module " << n << std::endl; } regressionData[ n ] = regressionModules[ n ]->getRegressionData()[0]; } if( useScaling ){ for(UINT n=0; n<numOutputDimensions; n++){ regressionData[n] = grt_scale(regressionData[n], 0.0, 1.0, targetVectorRanges[n].minValue, targetVectorRanges[n].maxValue); } } return true; }
bool BernoulliRBM::predict_(VectorFloat &inputData,VectorFloat &outputData){ if( !trained ){ errorLog << "predict_(VectorFloat &inputData,VectorFloat &outputData) - Failed to run prediction - the model has not been trained." << std::endl; return false; } if( inputData.size() != numVisibleUnits ){ errorLog << "predict_(VectorFloat &inputData,VectorFloat &outputData) - Failed to run prediction - the input data size (" << inputData.size() << ")"; errorLog << " does not match the number of visible units (" << numVisibleUnits << "). " << std::endl; return false; } if( outputData.size() != numHiddenUnits ){ outputData.resize( numHiddenUnits ); } //Scale the data if needed if( useScaling ){ for(UINT i=0; i<numVisibleUnits; i++){ inputData[i] = grt_scale(inputData[i],ranges[i].minValue,ranges[i].maxValue,0.0,1.0); } } //Propagate the data up through the RBM Float x = 0.0; for(UINT i=0; i<numHiddenUnits; i++){ for(UINT j=0; j<numVisibleUnits; j++) { x += weightsMatrix[i][j] * inputData[j]; } outputData[i] = grt_sigmoid( x + hiddenLayerBias[i] ); } return true; }
bool KMeans::predict_(VectorFloat &inputVector){ if( !trained ){ return false; } if( inputVector.getSize() != numInputDimensions ){ return false; } if( useScaling ){ for(UINT n=0; n<numInputDimensions; n++){ inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0); } } const Float sigma = 1.0; const Float gamma = 1.0 / (2.0*grt_sqr(sigma)); Float sum = 0; Float dist = 0; UINT minIndex = 0; bestDistance = grt_numeric_limits< Float >::max(); predictedClusterLabel = 0; maxLikelihood = 0; if( clusterLikelihoods.getSize() != numClusters ) clusterLikelihoods.resize( numClusters ); if( clusterDistances.getSize() != numClusters ) clusterDistances.resize( numClusters ); for(UINT i=0; i<numClusters; i++){ //We don't need to compute the sqrt as it works without it and is faster dist = 0; for(UINT j=0; j<numInputDimensions; j++){ dist += grt_sqr( inputVector[j]-clusters[i][j] ); } clusterDistances[i] = dist; clusterLikelihoods[i] = exp( - grt_sqr(gamma * dist) ); //1.0/(1.0+dist); //This will give us a value close to 1 for a dist of 0, and a value closer to 0 when the dist is large sum += clusterLikelihoods[i]; if( dist < bestDistance ){ bestDistance = dist; minIndex = i; } } //Normalize the likelihood for(UINT i=0; i<numClusters; i++){ clusterLikelihoods[i] /= sum; } predictedClusterLabel = clusterLabels[ minIndex ]; maxLikelihood = clusterLikelihoods[ minIndex ]; return true; }
bool RandomForests::predict_(VectorDouble &inputVector){ predictedClassLabel = 0; maxLikelihood = 0; if( !trained ){ errorLog << "predict_(VectorDouble &inputVector) - Model Not Trained!" << std::endl; return false; } if( inputVector.getSize() != numInputDimensions ){ errorLog << "predict_(VectorDouble &inputVector) - The size of the input Vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl; return false; } if( useScaling ){ for(UINT n=0; n<numInputDimensions; n++){ inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0); } } if( classLikelihoods.getSize() != numClasses ) classLikelihoods.resize(numClasses,0); if( classDistances.getSize() != numClasses ) classDistances.resize(numClasses,0); std::fill(classDistances.begin(),classDistances.end(),0); //Run the prediction for each tree in the forest VectorDouble y; for(UINT i=0; i<forestSize; i++){ if( !forest[i]->predict(inputVector, y) ){ errorLog << "predict_(VectorDouble &inputVector) - Tree " << i << " failed prediction!" << std::endl; return false; } for(UINT j=0; j<numClasses; j++){ classDistances[j] += y[j]; } } //Use the class distances to estimate the class likelihoods bestDistance = 0; UINT bestIndex = 0; Float classNorm = 1.0 / Float(forestSize); for(UINT k=0; k<numClasses; k++){ classLikelihoods[k] = classDistances[k] * classNorm; if( classLikelihoods[k] > maxLikelihood ){ maxLikelihood = classLikelihoods[k]; bestDistance = classDistances[k]; bestIndex = k; } } predictedClassLabel = classLabels[ bestIndex ]; return true; }
bool MinDist::predict_(VectorFloat &inputVector){ predictedClassLabel = 0; maxLikelihood = 0; if( !trained ){ errorLog << "predict_(VectorFloat &inputVector) - MinDist Model Not Trained!" << std::endl; return false; } if( inputVector.size() != numInputDimensions ){ errorLog << "predict_(VectorFloat &inputVector) - The size of the input vector (" << inputVector.size() << ") does not match the num features in the model (" << numInputDimensions << std::endl; return false; } if( useScaling ){ for(UINT n=0; n<numInputDimensions; n++){ inputVector[n] = grt_scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0); } } if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0); if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0); Float sum = 0; Float minDist = grt_numeric_limits< Float >::max(); for(UINT k=0; k<numClasses; k++){ //Compute the distance for class k classDistances[k] = models[k].predict( inputVector ); //Keep track of the best value if( classDistances[k] < minDist ){ minDist = classDistances[k]; predictedClassLabel = k; } //Set the class likelihoods as 1.0 / dist[k], the small number is to stop divide by zero classLikelihoods[k] = 1.0 / (classDistances[k] + 0.0001); sum += classLikelihoods[k]; } //Normalize the classlikelihoods if( sum != 0 ){ for(UINT k=0; k<numClasses; k++){ classLikelihoods[k] /= sum; } maxLikelihood = classLikelihoods[predictedClassLabel]; }else maxLikelihood = classLikelihoods[predictedClassLabel]; if( useNullRejection ){ //Check to see if the best result is greater than the models threshold if( minDist <= models[predictedClassLabel].getRejectionThreshold() ) predictedClassLabel = models[predictedClassLabel].getClassLabel(); else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; }else predictedClassLabel = models[predictedClassLabel].getClassLabel(); return true; }
bool ClassificationData::scale(const Vector<MinMax> &ranges,const Float minTarget,const Float maxTarget){ if( ranges.getSize() != numDimensions ) return false; //Scale the training data for(UINT i=0; i<totalNumSamples; i++){ for(UINT j=0; j<numDimensions; j++){ data[i][j] = grt_scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget); } } return true; }
bool GaussianMixtureModels::predict_(VectorFloat &x){ if( !trained ){ return false; } if( x.getSize() != numInputDimensions ){ return false; } if( useScaling ){ for(UINT n=0; n<numInputDimensions; n++){ x[n] = grt_scale(x[n], ranges[n].minValue, ranges[n].maxValue, 0.0, 1.0); } } Float sum = 0; Float dist = 0; UINT minIndex = 0; bestDistance = 0; predictedClusterLabel = 0; maxLikelihood = 0; if( clusterLikelihoods.size() != numClusters ) clusterLikelihoods.resize( numClusters ); if( clusterDistances.size() != numClusters ) clusterDistances.resize( numClusters ); for(UINT i=0; i<numClusters; i++){ dist = gauss(x,i,det,mu,invSigma); clusterDistances[i] = dist; clusterLikelihoods[i] = dist; sum += clusterLikelihoods[i]; if( dist > bestDistance ){ bestDistance = dist; minIndex = i; } } //Normalize the likelihood for(UINT i=0; i<numClusters; i++){ clusterLikelihoods[i] /= sum; } predictedClusterLabel = clusterLabels[ minIndex ]; maxLikelihood = clusterLikelihoods[ minIndex ]; return true; }
bool MatrixFloat::scale(const Vector< MinMax > &ranges,const Float minTarget,const Float maxTarget){ if( dataPtr == NULL ) return false; if( ranges.size() != cols ){ return false; } unsigned int i,j = 0; for(i=0; i<rows; i++){ for(j=0; j<cols; j++){ dataPtr[i*cols+j] = grt_scale(dataPtr[i*cols+j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget); } } return true; }
bool VectorFloat::scale( const Float minSource, const Float maxSource, const Float minTarget, const Float maxTarget, const bool constrain ){ const size_type N = this->size(); if( N == 0 ){ return false; } size_type i = 0; Float *data = getData(); for( i=0; i<N; i++ ){ data[i] = grt_scale(data[i],minSource,maxSource,minTarget,maxTarget,constrain); } return true; }
bool BernoulliRBM::train_(MatrixFloat &data){ const UINT numTrainingSamples = data.getNumRows(); numInputDimensions = data.getNumCols(); numOutputDimensions = numHiddenUnits; numVisibleUnits = numInputDimensions; trainingLog << "NumInputDimensions: " << numInputDimensions << std::endl; trainingLog << "NumOutputDimensions: " << numOutputDimensions << std::endl; if( randomizeWeightsForTraining ){ //Init the weights matrix weightsMatrix.resize(numHiddenUnits, numVisibleUnits); Float a = 1.0 / numVisibleUnits; for(UINT i=0; i<numHiddenUnits; i++) { for(UINT j=0; j<numVisibleUnits; j++) { weightsMatrix[i][j] = rand.getRandomNumberUniform(-a, a); } } //Init the bias units visibleLayerBias.resize( numVisibleUnits ); hiddenLayerBias.resize( numHiddenUnits ); std::fill(visibleLayerBias.begin(),visibleLayerBias.end(),0); std::fill(hiddenLayerBias.begin(),hiddenLayerBias.end(),0); }else{ if( weightsMatrix.getNumRows() != numHiddenUnits ){ errorLog << "train_(MatrixFloat &data) - Weights matrix row size does not match the number of hidden units!" << std::endl; return false; } if( weightsMatrix.getNumCols() != numVisibleUnits ){ errorLog << "train_(MatrixFloat &data) - Weights matrix row size does not match the number of visible units!" << std::endl; return false; } if( visibleLayerBias.size() != numVisibleUnits ){ errorLog << "train_(MatrixFloat &data) - Visible layer bias size does not match the number of visible units!" << std::endl; return false; } if( hiddenLayerBias.size() != numHiddenUnits ){ errorLog << "train_(MatrixFloat &data) - Hidden layer bias size does not match the number of hidden units!" << std::endl; return false; } } //Flag the model has been trained encase the user wants to save the model during a training iteration using an observer trained = true; //Make sure the data is scaled between [0 1] ranges = data.getRanges(); if( useScaling ){ for(UINT i=0; i<numTrainingSamples; i++){ for(UINT j=0; j<numInputDimensions; j++){ data[i][j] = grt_scale(data[i][j], ranges[j].minValue, ranges[j].maxValue, 0.0, 1.0); } } } const UINT numBatches = static_cast<UINT>( ceil( Float(numTrainingSamples)/batchSize ) ); //Setup the batch indexs Vector< BatchIndexs > batchIndexs( numBatches ); UINT startIndex = 0; for(UINT i=0; i<numBatches; i++){ batchIndexs[i].startIndex = startIndex; batchIndexs[i].endIndex = startIndex + batchSize; //Make sure the last batch end index is not larger than the number of training examples if( batchIndexs[i].endIndex >= numTrainingSamples ){ batchIndexs[i].endIndex = numTrainingSamples; } //Get the batch size batchIndexs[i].batchSize = batchIndexs[i].endIndex - batchIndexs[i].startIndex; //Set the start index for the next batch startIndex = batchIndexs[i].endIndex; } Timer timer; UINT i,j,n,epoch,noChangeCounter = 0; Float startTime = 0; Float alpha = learningRate; Float error = 0; Float err = 0; Float delta = 0; Float lastError = 0; Vector< UINT > indexList(numTrainingSamples); TrainingResult trainingResult; MatrixFloat wT( numVisibleUnits, numHiddenUnits ); //Stores a transposed copy of the weights vector MatrixFloat vW( numHiddenUnits, numVisibleUnits ); //Stores the weight velocity updates MatrixFloat tmpW( numHiddenUnits, numVisibleUnits ); //Stores the weight values that will be used to update the main weights matrix at each batch update MatrixFloat v1( batchSize, numVisibleUnits ); //Stores the real batch data during a batch update MatrixFloat v2( batchSize, numVisibleUnits ); //Stores the sampled batch data during a batch update MatrixFloat h1( batchSize, numHiddenUnits ); //Stores the hidden states given v1 and the current weightsMatrix MatrixFloat h2( batchSize, numHiddenUnits ); //Stores the sampled hidden states given v2 and the current weightsMatrix MatrixFloat c1( numHiddenUnits, numVisibleUnits ); //Stores h1' * v1 MatrixFloat c2( numHiddenUnits, numVisibleUnits ); //Stores h2' * v2 MatrixFloat vDiff( batchSize, numVisibleUnits ); //Stores the difference between v1-v2 MatrixFloat hDiff( batchSize, numVisibleUnits ); //Stores the difference between h1-h2 MatrixFloat cDiff( numHiddenUnits, numVisibleUnits ); //Stores the difference between c1-c2 VectorFloat vDiffSum( numVisibleUnits ); //Stores the column sum of vDiff VectorFloat hDiffSum( numHiddenUnits ); //Stores the column sum of hDiff VectorFloat visibleLayerBiasVelocity( numVisibleUnits ); //Stores the velocity update of the visibleLayerBias VectorFloat hiddenLayerBiasVelocity( numHiddenUnits ); //Stores the velocity update of the hiddenLayerBias //Set all the velocity weights to zero vW.setAllValues( 0 ); std::fill(visibleLayerBiasVelocity.begin(),visibleLayerBiasVelocity.end(),0); std::fill(hiddenLayerBiasVelocity.begin(),hiddenLayerBiasVelocity.end(),0); //Randomize the order that the training samples will be used in for(UINT i=0; i<numTrainingSamples; i++) indexList[i] = i; if( randomiseTrainingOrder ){ std::random_shuffle(indexList.begin(), indexList.end()); } //Start the main training loop timer.start(); for(epoch=0; epoch<maxNumEpochs; epoch++) { startTime = timer.getMilliSeconds(); error = 0; //Randomize the batch order std::random_shuffle(batchIndexs.begin(),batchIndexs.end()); //Run each of the batch updates for(UINT k=0; k<numBatches; k+=batchStepSize){ //Resize the data matrices, the matrices will only be resized if the rows cols are different v1.resize( batchIndexs[k].batchSize, numVisibleUnits ); h1.resize( batchIndexs[k].batchSize, numHiddenUnits ); v2.resize( batchIndexs[k].batchSize, numVisibleUnits ); h2.resize( batchIndexs[k].batchSize, numHiddenUnits ); //Setup the data pointers, using data pointers saves a few ms on large matrix updates Float **w_p = weightsMatrix.getDataPointer(); Float **wT_p = wT.getDataPointer(); Float **vW_p = vW.getDataPointer(); Float **data_p = data.getDataPointer(); Float **v1_p = v1.getDataPointer(); Float **v2_p = v2.getDataPointer(); Float **h1_p = h1.getDataPointer(); Float **h2_p = h2.getDataPointer(); Float *vlb_p = &visibleLayerBias[0]; Float *hlb_p = &hiddenLayerBias[0]; //Get the batch data UINT index = 0; for(i=batchIndexs[k].startIndex; i<batchIndexs[k].endIndex; i++){ for(j=0; j<numVisibleUnits; j++){ v1_p[index][j] = data_p[ indexList[i] ][j]; } index++; } //Copy a transposed version of the weights matrix, this is used to compute h1 and h2 for(i=0; i<numHiddenUnits; i++) for(j=0; j<numVisibleUnits; j++) wT_p[j][i] = w_p[i][j]; //Compute h1 h1.multiple(v1, wT); for(n=0; n<batchIndexs[k].batchSize; n++){ for(i=0; i<numHiddenUnits; i++){ h1_p[n][i] = sigmoidRandom( h1_p[n][i] + hlb_p[i] ); } } //Compute v2 v2.multiple(h1, weightsMatrix); for(n=0; n<batchIndexs[k].batchSize; n++){ for(i=0; i<numVisibleUnits; i++){ v2_p[n][i] = sigmoidRandom( v2_p[n][i] + vlb_p[i] ); } } //Compute h2 h2.multiple(v2,wT); for(n=0; n<batchIndexs[k].batchSize; n++){ for(i=0; i<numHiddenUnits; i++){ h2_p[n][i] = grt_sigmoid( h2_p[n][i] + hlb_p[i] ); } } //Compute c1, c2 and the difference between v1-v2 c1.multiple(h1,v1,true); c2.multiple(h2,v2,true); vDiff.subtract(v1, v2); //Compute the sum of vdiff for(j=0; j<numVisibleUnits; j++){ vDiffSum[j] = 0; for(i=0; i<batchIndexs[k].batchSize; i++){ vDiffSum[j] += vDiff[i][j]; } } //Compute the difference between h1 and h2 hDiff.subtract(h1, h2); for(j=0; j<numHiddenUnits; j++){ hDiffSum[j] = 0; for(i=0; i<batchIndexs[k].batchSize; i++){ hDiffSum[j] += hDiff[i][j]; } } //Compute the difference between c1 and c2 cDiff.subtract(c1,c2); //Update the weight velocities for(i=0; i<numHiddenUnits; i++){ for(j=0; j<numVisibleUnits; j++){ vW_p[i][j] = ((momentum * vW_p[i][j]) + (alpha * cDiff[i][j])) / batchIndexs[k].batchSize; } } for(i=0; i<numVisibleUnits; i++){ visibleLayerBiasVelocity[i] = ((momentum * visibleLayerBiasVelocity[i]) + (alpha * vDiffSum[i])) / batchIndexs[k].batchSize; } for(i=0; i<numHiddenUnits; i++){ hiddenLayerBiasVelocity[i] = ((momentum * hiddenLayerBiasVelocity[i]) + (alpha * hDiffSum[i])) / batchIndexs[k].batchSize; } //Update the weights weightsMatrix.add( vW ); //Update the bias for the visible layer for(i=0; i<numVisibleUnits; i++){ visibleLayerBias[i] += visibleLayerBiasVelocity[i]; } //Update the bias for the visible layer for(i=0; i<numHiddenUnits; i++){ hiddenLayerBias[i] += hiddenLayerBiasVelocity[i]; } //Compute the reconstruction error err = 0; for(i=0; i<batchIndexs[k].batchSize; i++){ for(j=0; j<numVisibleUnits; j++){ err += SQR( v1[i][j] - v2[i][j] ); } } error += err / batchIndexs[k].batchSize; } error /= numBatches; delta = lastError - error; lastError = error; trainingLog << "Epoch: " << epoch+1 << "/" << maxNumEpochs; trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds"; trainingLog << " Learning rate: " << alpha; trainingLog << " Momentum: " << momentum; trainingLog << " Average reconstruction error: " << error; trainingLog << " Delta: " << delta << std::endl; //Update the learning rate alpha *= learningRateUpdate; trainingResult.setClassificationResult(epoch, error, this); trainingResults.push_back(trainingResult); trainingResultsObserverManager.notifyObservers( trainingResult ); //Check for convergance if( fabs(delta) < minChange ){ if( ++noChangeCounter >= minNumEpochs ){ trainingLog << "Stopping training. MinChange limit reached!" << std::endl; break; } }else noChangeCounter = 0; } trainingLog << "Training complete after " << epoch << " epochs. Total training time: " << timer.getMilliSeconds()/1000.0 << " seconds" << std::endl; trained = true; return true; }
bool GMM::predict_(VectorFloat &x){ predictedClassLabel = 0; if( classDistances.getSize() != numClasses || classLikelihoods.getSize() != numClasses ){ classDistances.resize(numClasses); classLikelihoods.resize(numClasses); } if( !trained ){ errorLog << "predict_(VectorFloat &x) - Mixture Models have not been trained!" << std::endl; return false; } if( x.getSize() != numInputDimensions ){ errorLog << "predict_(VectorFloat &x) - The size of the input vector (" << x.getSize() << ") does not match that of the number of features the model was trained with (" << numInputDimensions << ")." << std::endl; return false; } if( useScaling ){ for(UINT i=0; i<numInputDimensions; i++){ x[i] = grt_scale(x[i], ranges[i].minValue, ranges[i].maxValue, GMM_MIN_SCALE_VALUE, GMM_MAX_SCALE_VALUE); } } UINT bestIndex = 0; maxLikelihood = 0; bestDistance = 0; Float sum = 0; for(UINT k=0; k<numClasses; k++){ classDistances[k] = computeMixtureLikelihood(x,k); //cout << "K: " << k << " Dist: " << classDistances[k] << std::endl; classLikelihoods[k] = classDistances[k]; sum += classLikelihoods[k]; if( classLikelihoods[k] > bestDistance ){ bestDistance = classLikelihoods[k]; bestIndex = k; } } //Normalize the likelihoods for(unsigned int k=0; k<numClasses; k++){ classLikelihoods[k] /= sum; } maxLikelihood = classLikelihoods[bestIndex]; if( useNullRejection ){ //cout << "Dist: " << classDistances[bestIndex] << " RejectionThreshold: " << models[bestIndex].getRejectionThreshold() << std::endl; //If the best distance is below the modles rejection threshold then set the predicted class label as the best class label //Otherwise set the predicted class label as the default null rejection class label of 0 if( classDistances[bestIndex] >= models[bestIndex].getNullRejectionThreshold() ){ predictedClassLabel = models[bestIndex].getClassLabel(); }else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; }else{ //Get the predicted class label predictedClassLabel = models[bestIndex].getClassLabel(); } return true; }