//Compute the regression data that will be stored at this node
bool RegressionTree::computeNodeRegressionData( const RegressionData &trainingData, VectorDouble &regressionData ){
    
    const UINT M = trainingData.getNumSamples();
    const UINT N = trainingData.getNumInputDimensions();
    const UINT T = trainingData.getNumTargetDimensions();
    
    if( M == 0 ){
        Regressifier::errorLog << "computeNodeRegressionData(...) - Failed to compute regression data, there are zero training samples!" << endl;
        return false;
    }
    
    //Make sure the regression data is the correct size
    regressionData.clear();
    regressionData.resize( T, 0 );
    
    //The regression data at this node is simply an average over all the training data at this node
    for(unsigned int j=0; j<N; j++){
        for(unsigned int i=0; i<M; i++){
            regressionData[j] += trainingData[i].getTargetVector()[j];
        }
        regressionData[j] /= M;
    }
    
    return true;
}
Пример #2
0
void   ClassificationBiasMatrix::PerformAdjustmnts (const VectorDouble&  classifiedCounts,
                                                    VectorDouble&        adjCounts,
                                                    VectorDouble&        stdErrors
                                                   )
{
  // For description of calc's see the paper: 
  //    "Estimating the Taxonomic composition of a sample when individuals are classified with error"
  //     by Andrew Solow, Cabll Davis, Qiao Hu
  //     Woods Hole Oceanographic Institution, Woods Hole Massachusetts
  //     Marine Ecology Progress Series
  //     published 2006-july-06;  vol 216:309-311

  if  (classifiedCounts.size () != (kkuint32)numClasses)
  {
    KKStr errMsg = "ClassificationBiasMatrix::PerformAdjustmnts  ***ERROR***   Disagreement in length of classifiedCounts[" + 
                   StrFormatInt ((kkint32)classifiedCounts.size (), "ZZZ0") + 
                   "]  and Prev Defined ClassList[" + StrFormatInt (numClasses, "ZZZ0") + "].";
    runLog.Level (-1) << errMsg << endl;
    valid = false;
    throw KKException (errMsg);
  }

  kkint32 x = 0;
  kkint32  i, j, k;


  // We need to deal with the special case when one entry in the probability diagonal is zero.
  {
    for (x = 0;  x < numClasses;  x++)
    {
      if  ((*probabilities)[x][x] == 0.0)
      {
        // This will cause the inversion of the diagonal matrix to fail.  To deal
        // with this situation; I will steal some probability from other buckets on 
        // same row.

        double  totalAmtStolen = 0.0;
        double  percentToSteal = 0.01;
        for (i = 0;  i < numClasses;  i++)
        {
          if  ((*probabilities)[x][i] != 0.0)
          {
            double amtToSteal = (*probabilities)[x][i] * percentToSteal;
            (*probabilities)[x][i] = (*probabilities)[x][i] - amtToSteal;
            totalAmtStolen += amtToSteal;
          }
        }

        (*probabilities)[x][x] = totalAmtStolen;
      }
    }
  }

  Matrix  m (numClasses, 1);
  for  (x = 0;  x < numClasses;  x++)
    m[x][0] = classifiedCounts[x];

  Matrix  transposed = probabilities->Transpose ();
  Matrix  Q = transposed.Inverse ();
  Matrix  n = Q * m;

  Matrix  varM (numClasses, numClasses);
  for  (j = 0;  j < numClasses;  j++)
  {
    double  varM_j = 0.0;
    for  (i = 0;  i < numClasses;  i++)
    {
      double  p = (*probabilities)[i][j];
      varM_j += n[i][0] * p * (1.0 - p);
    }
    varM[j][j] = varM_j;
  }

  for (j = 0;  j < numClasses;  j++)
  {
    for  (k = 0;  k < numClasses;  k++)
    {
      if  (j != k)
      {
        double  covM_jk = 0.0;
        for  (i = 0;  i < numClasses;  i++)
          covM_jk -= n[i][0] * (*probabilities)[i][j] * (*probabilities)[j][k];
        varM[j][k] = covM_jk;
      }
    }
  }

  Matrix  varN = Q * varM * Q.Transpose ();

  adjCounts.clear ();
  stdErrors.clear ();
    
  for  (x = 0;  x < numClasses;  x++)
  {
    adjCounts.push_back (n[x][0]);
    stdErrors.push_back (sqrt (varN[x][x]));
  }

  return;
}  /* PerformAdjustmnts */