Example #1
0
double infoGain(python::object resArr) {
  PyObject *matObj = resArr.ptr();
  if (!PyArray_Check(matObj)) {
    throw_value_error("Expecting a Numeric array object");
  }
  PyArrayObject *copy;
  copy = (PyArrayObject *)PyArray_ContiguousFromObject(
      matObj, ((PyArrayObject *)matObj)->descr->type_num, 2, 2);
  long int rows = (long int)((PyArrayObject *)matObj)->dimensions[0];
  long int cols = (long int)((PyArrayObject *)matObj)->dimensions[1];
  double res = 0.0;
  if (((PyArrayObject *)matObj)->descr->type_num == PyArray_DOUBLE) {
    double *data = (double *)copy->data;
    res = InfoEntropyGain(data, rows, cols);
  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_FLOAT) {
    float *data = (float *)copy->data;
    res = InfoEntropyGain(data, rows, cols);
  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_INT) {
    int *data = (int *)copy->data;
    res = InfoEntropyGain(data, rows, cols);
  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_LONG) {
    long int *data = (long int *)copy->data;
    res = InfoEntropyGain(data, rows, cols);
  } else {
    throw_value_error(
        "Numeric array object of type int or long or float or double");
  }
  Py_DECREF(copy);
  return res;
}
Example #2
0
 double InfoBitRanker::BiasInfoEntropyGain(RDKit::USHORT *resMat) const {
   PRECONDITION(resMat,"bad result pointer");
   bool bitOk = this->BiasCheckBit(resMat);
   double info=0.0;
   if (bitOk) {
     info = InfoEntropyGain(resMat, 2, d_classes);
   }
   return info;
 }
Example #3
0
  double *InfoBitRanker::getTopN(unsigned int num) {
    // this is a place holder to pass along to infogain function
    // the size of this container should nVals*d_classes, where nVals
    // is the number of values a variable can take.
    // since we are dealing with a binary bit vector nVals = 2
    // in addition the infogain function pretends that this is a 2D matrix
    // with the number of rows equal to nVals and num of columns equal to 
    // d_classes
    if(num>d_dims) throw ValueErrorException("attempt to rank more bits than present in the bit vectors");
    if(dp_maskBits)
      CHECK_INVARIANT(num <= dp_maskBits->getNumOnBits(), "Can't rank more bits than the ensemble size"); 
    RDKit::USHORT *resMat = new RDKit::USHORT[2*d_classes];
    
    PR_QUEUE topN;

    for (unsigned int i = 0; i < d_dims; i++) {
      // we may want to ignore bits that are not turned on in any item of class 
      // "ignoreNoClass"
      /*
      if ((0 <= ignoreNoClass) && (d_classes > ignoreNoClass)) {
        if (d_counts[ignoreNoClass][i] == 0) {
          continue;
        }
        }*/
      
      
      if (dp_maskBits && !dp_maskBits->getBit(i)) {
           continue;
      }

      // fill up dmat
      for (unsigned int j = 0; j < d_classes; j++) {
        // we know that we have only two rows here
        resMat[j] = d_counts[j][i];
        resMat[d_classes + j] = (d_clsCount[j] - d_counts[j][i]);
      }
      double info = 0.0;
      switch (d_type) {
      case ENTROPY:
        info = InfoEntropyGain(resMat, 2, d_classes);
        break;
      case BIASENTROPY:
        info = this->BiasInfoEntropyGain(resMat);
        break;
      case CHISQUARE:
        info = ChiSquare(resMat, 2, d_classes);
        break;
      case BIASCHISQUARE:
        info = BiasChiSquareGain(resMat);
        break;
      default:
        break;
      }

      PAIR_D_I entry(info, i);
      
      if (info >= 0.0) {
        if (topN.size() < num) {
          topN.push(entry);
        }
        else if (info > topN.top().first) {
          topN.pop();
          topN.push(entry);
        }
      }
    }
    
    delete [] resMat;
    
    // now fill up the result matrix for the topN bits
    // the result from this function is a double * of size 
    // num*4. The caller of this function interprets this
    // array as a two dimensional array of size num*(2+d_classes) with each row
    // containing the following entries 
    //   bitId, infogain, 1 additional column for number of hits for each class
    //double *res = new double[num*(2+d_classes)];
    
    d_top = num;
    int ncols = 2+d_classes;
    
    delete [] dp_topBits;
    dp_topBits = new double[num*ncols];
    
    int offset, bid;
    
    RDKit::INT_VECT maskBits;
    if (dp_maskBits && topN.size() < num) {
      dp_maskBits->getOnBits(maskBits);
    }

    for (int i = num - 1; i >= 0; i--) {
      offset = i*ncols;
      if (topN.size() == 0 ) {
        if (dp_maskBits) {
              bid = maskBits[i];
        } else {
              bid = i;
        }
        dp_topBits[offset + 1] = 0.0;
      } else {
        bid = topN.top().second; // bit id
        dp_topBits[offset + 1] = topN.top().first; // value of the infogain
        topN.pop();
      }
      dp_topBits[offset] = (double)bid;
      
      for (unsigned int j = 0; j < d_classes; j++) {
        dp_topBits[offset + 2 + j] = (double)d_counts[j][bid];
      }
    }
    return dp_topBits;
  }