Ejemplo n.º 1
0
PyObject *getTanimotoSimMat(python::object bitVectList) {
    // we will assume here that we have a either a list of ExplicitBitVectors or
    // SparseBitVects
    int nrows = python::extract<int>(bitVectList.attr("__len__")());
    CHECK_INVARIANT(nrows > 1, "");

    // First check what type of vector we have
    python::object v1 = bitVectList[0];
    python::extract<ExplicitBitVect> ebvWorks(v1);
    python::extract<SparseBitVect> sbvWorks(v1);
    if(!ebvWorks.check() && !sbvWorks.check()) {
        throw_value_error("GetTanimotoDistMat can only take a sequence of ExplicitBitVects or SparseBitvects");
    }

    npy_intp dMatLen = nrows*(nrows-1)/2;
    PyArrayObject *simRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
    double *sMat = (double *)simRes->data;

    if (ebvWorks.check()) {
        PySequenceHolder<ExplicitBitVect> dData(bitVectList);
        MetricMatrixCalc<PySequenceHolder<ExplicitBitVect>, ExplicitBitVect> mmCalc;
        mmCalc.setMetricFunc(&TanimotoSimilarityMetric<ExplicitBitVect, ExplicitBitVect>);
        mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
    }
    else if (sbvWorks.check()) {
        PySequenceHolder<SparseBitVect> dData(bitVectList);
        MetricMatrixCalc<PySequenceHolder<SparseBitVect>, SparseBitVect> mmCalc;
        mmCalc.setMetricFunc(&TanimotoSimilarityMetric<SparseBitVect, SparseBitVect>);
        mmCalc.calcMetricMatrix(dData, nrows, 0, sMat);
    }
    return PyArray_Return(simRes);
}
Ejemplo n.º 2
0
int main() {
  int n = 10;
  int m = 3;
  int dlen = n * (n - 1) / 2;
  int i, j;
  double *desc = new double[n * m];
  double **desc2D = new double *[n];

  for (i = 0; i < n; i++) {
    desc2D[i] = desc;
    desc += m;
  }
  desc = desc2D[0];

  for (i = 0; i < n; i++) {
    for (j = 0; j < m; j++) {
      desc[i * m + j] = ((double)rand()) / 10;
    }
  }

  // double x = EuclideanDistanceMetric(desc2D[0], desc2D[1], m);
  double *dmat = new double[dlen];
  MetricMatrixCalc<double **, double *> mmCalc;
  mmCalc.setMetricFunc(&EuclideanDistanceMetric<double *, double *>);
  mmCalc.calcMetricMatrix(desc2D, n, m, dmat);

  for (i = 0; i < dlen; i++) {
    std::cout << dmat[i] << "\n";
  }

  delete[] desc2D;
  delete[] desc;
  delete[] dmat;

  exit(0);
}
Ejemplo n.º 3
0
PyObject *getEuclideanDistMat(python::object descripMat) {
    // Bit of a pain involved here, we accept three types of PyObjects here
    // 1. A Numeric Array
    //     - first find what 'type' of entry we have (float, double and int is all we recognize for now)
    //     - then point to contiguous piece of memory from the array that contains the data with a type*
    //     - then make a new type** pointer so that double index into this contiguous memory will work
    //       and then pass it along to the distance calculator
    // 2. A list of Numeric Vector (or 1D arrays)
    //     - in this case wrap descripMat with a PySequenceHolder<type*> where type is the
    //       type of entry in vector (accepted types are int, double and float
    //     - Then pass the PySequenceHolder to the metrci calculator
    // 3. A list (or tuple) of lists (or tuple)
    //     - In this case other than wrapping descripMat with a PySequenceHolder
    //       each of the indivual list in there are also wrapped by a PySequenceHolder
    //     - so the distance calculator is passed in a "PySequenceHolder<PySequenceHolder<double>>"
    //     - FIX: not that we always convert entry values to double here, even if we passed
    //       in a list of list of ints (or floats). Given that lists can be heterogeneous, I do not
    //       know how to ask a list what type of entries if contains.
    //
    //  OK my brain is going to explode now

    // first deal with situation where we have an Numeric Array
    PyObject *descMatObj = descripMat.ptr();
    PyArrayObject *distRes;
    if (PyArray_Check(descMatObj)) {
        // get the dimensions of the array
        int nrows = ((PyArrayObject *)descMatObj)->dimensions[0];
        int ncols = ((PyArrayObject *)descMatObj)->dimensions[1];
        int i;
        CHECK_INVARIANT((nrows > 0) && (ncols > 0), "");

        npy_intp dMatLen = nrows*(nrows-1)/2;

        // now that we have the dimensions declare the distance matrix which is always a
        // 1D double array
        distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);

        // grab a pointer to the data in the array so that we can directly put values in there
        // and avoid copying :
        double *dMat = (double *)distRes->data;

        PyArrayObject *copy;
        copy = (PyArrayObject *)PyArray_ContiguousFromObject(descMatObj,
                ((PyArrayObject *)descMatObj)->descr->type_num,
                2,2);
        // if we have double array
        if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_DOUBLE) {
            double *desc = (double *)copy->data;

            // REVIEW: create an adaptor object to hold a double * and support
            //  operator[]() so that we don't have to do this stuff:

            // here is the 2D array trick this so that when the distance calaculator
            // asks for desc2D[i] we basically get the ith row as double*
            double **desc2D = new double*[nrows];
            for (i = 0; i < nrows; i++) {
                desc2D[i] = desc;
                desc += ncols;
            }
            MetricMatrixCalc<double**, double*> mmCalc;
            mmCalc.setMetricFunc(&EuclideanDistanceMetric<double *, double *>);
            mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);

            delete [] desc2D;
            // we got the distance matrix we are happy so return
            return PyArray_Return(distRes);
        }

        // if we have a float array
        else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_FLOAT) {
            float* desc = (float *)copy->data;
            float **desc2D = new float*[nrows];
            for (i = 0; i < nrows; i++) {
                desc2D[i] = desc;
                desc += ncols;
            }
            MetricMatrixCalc<float**, float*> mmCalc;
            mmCalc.setMetricFunc(&EuclideanDistanceMetric<float *, float*>);
            mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
            delete [] desc2D;
            return PyArray_Return(distRes);
        }

        // if we have an interger array
        else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_INT) {
            int *desc = (int *)copy->data;
            int **desc2D = new int*[nrows];
            for (i = 0; i < nrows; i++) {
                desc2D[i] = desc;
                desc += ncols;
            }
            MetricMatrixCalc<int**, int*> mmCalc;
            mmCalc.setMetricFunc(&EuclideanDistanceMetric<int *, int*>);
            mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
            delete [] desc2D;
            return PyArray_Return(distRes);
        }
        else {
            // unreconiged type for the matrix, throw up
            throw_value_error("The array has to be of type int, float, or double for GetEuclideanDistMat");
        }
    } // done with an array input
    else {
        // REVIEW: removed a ton of code here

        // we have probably have a list or a tuple

        unsigned int ncols = 0;
        unsigned int nrows = python::extract<unsigned int>(descripMat.attr("__len__")());
        CHECK_INVARIANT(nrows > 0, "Empty list passed in");

        npy_intp dMatLen = nrows*(nrows-1)/2;
        distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
        double *dMat = (double *)distRes->data;

        // assume that we a have a list of list of values (that can be extracted to double)
        std::vector<PySequenceHolder<double> > dData;
        dData.reserve(nrows);
        for (unsigned int i = 0; i < nrows; i++) {
            //PySequenceHolder<double> row(seq[i]);
            PySequenceHolder<double> row(descripMat[i]);
            if(i==0) {
                ncols = row.size();
            } else if( row.size() != ncols ) {
                throw_value_error("All subsequences must be the same length");
            }
            dData.push_back(row);
        }

        MetricMatrixCalc< std::vector<PySequenceHolder<double> >, PySequenceHolder<double> > mmCalc;
        mmCalc.setMetricFunc(&EuclideanDistanceMetric< PySequenceHolder<double>, PySequenceHolder<double> >);
        mmCalc.calcMetricMatrix(dData, nrows, ncols, dMat);
    }
    return PyArray_Return(distRes);
}