//Compute the regression data that will be stored at this node bool RegressionTree::computeNodeRegressionData( const RegressionData &trainingData, VectorDouble ®ressionData ){ const UINT M = trainingData.getNumSamples(); const UINT N = trainingData.getNumInputDimensions(); const UINT T = trainingData.getNumTargetDimensions(); if( M == 0 ){ Regressifier::errorLog << "computeNodeRegressionData(...) - Failed to compute regression data, there are zero training samples!" << endl; return false; } //Make sure the regression data is the correct size regressionData.clear(); regressionData.resize( T, 0 ); //The regression data at this node is simply an average over all the training data at this node for(unsigned int j=0; j<N; j++){ for(unsigned int i=0; i<M; i++){ regressionData[j] += trainingData[i].getTargetVector()[j]; } regressionData[j] /= M; } return true; }
void ClassificationBiasMatrix::PerformAdjustmnts (const VectorDouble& classifiedCounts, VectorDouble& adjCounts, VectorDouble& stdErrors ) { // For description of calc's see the paper: // "Estimating the Taxonomic composition of a sample when individuals are classified with error" // by Andrew Solow, Cabll Davis, Qiao Hu // Woods Hole Oceanographic Institution, Woods Hole Massachusetts // Marine Ecology Progress Series // published 2006-july-06; vol 216:309-311 if (classifiedCounts.size () != (kkuint32)numClasses) { KKStr errMsg = "ClassificationBiasMatrix::PerformAdjustmnts ***ERROR*** Disagreement in length of classifiedCounts[" + StrFormatInt ((kkint32)classifiedCounts.size (), "ZZZ0") + "] and Prev Defined ClassList[" + StrFormatInt (numClasses, "ZZZ0") + "]."; runLog.Level (-1) << errMsg << endl; valid = false; throw KKException (errMsg); } kkint32 x = 0; kkint32 i, j, k; // We need to deal with the special case when one entry in the probability diagonal is zero. { for (x = 0; x < numClasses; x++) { if ((*probabilities)[x][x] == 0.0) { // This will cause the inversion of the diagonal matrix to fail. To deal // with this situation; I will steal some probability from other buckets on // same row. double totalAmtStolen = 0.0; double percentToSteal = 0.01; for (i = 0; i < numClasses; i++) { if ((*probabilities)[x][i] != 0.0) { double amtToSteal = (*probabilities)[x][i] * percentToSteal; (*probabilities)[x][i] = (*probabilities)[x][i] - amtToSteal; totalAmtStolen += amtToSteal; } } (*probabilities)[x][x] = totalAmtStolen; } } } Matrix m (numClasses, 1); for (x = 0; x < numClasses; x++) m[x][0] = classifiedCounts[x]; Matrix transposed = probabilities->Transpose (); Matrix Q = transposed.Inverse (); Matrix n = Q * m; Matrix varM (numClasses, numClasses); for (j = 0; j < numClasses; j++) { double varM_j = 0.0; for (i = 0; i < numClasses; i++) { double p = (*probabilities)[i][j]; varM_j += n[i][0] * p * (1.0 - p); } varM[j][j] = varM_j; } for (j = 0; j < numClasses; j++) { for (k = 0; k < numClasses; k++) { if (j != k) { double covM_jk = 0.0; for (i = 0; i < numClasses; i++) covM_jk -= n[i][0] * (*probabilities)[i][j] * (*probabilities)[j][k]; varM[j][k] = covM_jk; } } } Matrix varN = Q * varM * Q.Transpose (); adjCounts.clear (); stdErrors.clear (); for (x = 0; x < numClasses; x++) { adjCounts.push_back (n[x][0]); stdErrors.push_back (sqrt (varN[x][x])); } return; } /* PerformAdjustmnts */