Beispiel #1
0
 SepMatrixAccessor( BigMatrix &bm)
 {
   _ppMat = reinterpret_cast<T**>(bm.matrix());
   _rowOffset = bm.row_offset();
   _colOffset = bm.col_offset();
   _totalRows = bm.nrow();
 }
Beispiel #2
0
/* Pointer utility, returns a double pointer for either a BigMatrix or a
 * standard R matrix.
 */
double *
make_double_ptr (SEXP matrix, SEXP isBigMatrix)
{
  double *matrix_ptr;

  if (LOGICAL_VALUE (isBigMatrix) == (Rboolean) TRUE)   // Big Matrix
    {
      SEXP address = GET_SLOT (matrix, install ("address"));
      BigMatrix *pbm =
        reinterpret_cast < BigMatrix * >(R_ExternalPtrAddr (address));
      if (!pbm)
        return (NULL);

      // Check that have acceptable big.matrix
      if (pbm->row_offset () > 0 && pbm->ncol () > 1)
        {
          std::string errMsg =
            string ("sub.big.matrix objects cannoth have row ") +
            string
            ("offset greater than zero and number of columns greater than 1");
          Rf_error (errMsg.c_str ());
          return (NULL);
        }

      index_type offset = pbm->nrow () * pbm->col_offset ();
      matrix_ptr = reinterpret_cast < double *>(pbm->matrix ()) + offset;
    }
  else                          // Regular R Matrix
    {
      matrix_ptr = NUMERIC_DATA (matrix);
    }

  return (matrix_ptr);
};
Beispiel #3
0
/* Prepares the a matrix based on random sample of examples for modelling. For
   each continuous variable, copies only the in-sample indices from asave to a.
   Data for categorical variables are not copied, as they are stored in x.
   This function should only be called if there are any continuous variables. */
SEXP moda(SEXP asaveP, SEXP aP, SEXP insampP) {
    // Initialize function arguments.
    BigMatrix *asave = (BigMatrix*)R_ExternalPtrAddr(asaveP);
    BigMatrix *a = (BigMatrix*)R_ExternalPtrAddr(aP);
    MatrixAccessor<int> asaveAcc(*asave);
    MatrixAccessor<int> aAcc(*a);
    int *asaveCol, *aCol;
    int *insamp = INTEGER(insampP);
    
    // Set up working variables.
    index_type nCols = asave->ncol();
    index_type nRows = asave->nrow();
    index_type i, ja, jb;
    
    // For each numerical variable, move all the in-sample data to the top rows
    // of a.
    for (i = 0; i < nCols; i++) {
        asaveCol = asaveAcc[i];
        aCol = aAcc[i];
        for (ja = 0, jb = 0; ja < nRows; ja++) {
            if (insamp[asaveCol[ja] - 1] >= 1) {
                aCol[jb++] = asaveCol[ja];
            }
        }
    }
    return R_NilValue;
}
Beispiel #4
0
 MatrixAccessor( BigMatrix &bm )
 {
   _pMat = reinterpret_cast<T*>(bm.matrix());
   _totalRows = bm.total_rows();
   _totalCols = bm.total_columns();
   _rowOffset = bm.row_offset();
   _colOffset = bm.col_offset();
   _nrow = bm.nrow();
   _ncol = bm.ncol();
 }
Beispiel #5
0
SEXP ComputePvalsMain(SEXP Rinmat, SEXP Routmat, SEXP Routcol) {
    BigMatrix *inMat = reinterpret_cast<BigMatrix*>(R_ExternalPtrAddr(Rinmat));
    BigMatrix *outMat = reinterpret_cast<BigMatrix*>(R_ExternalPtrAddr(Routmat));
    double outCol = NUMERIC_DATA(Routcol)[0];
    
    if (inMat->separated_columns() != outMat->separated_columns())
        Rf_error("all big matrices are not the same column separated type");
    if (inMat->matrix_type() != outMat->matrix_type())
        Rf_error("all big matrices are not the same matrix type");
    if (inMat->ncol() != outMat->nrow())
        Rf_error("inMat # of cols must be the same as outMat # of rows");
    
    CALL_BIGFUNCTION_ARGS_THREE(ComputePvals, inMat, outMat, outCol)
    return(ret);
}
Beispiel #6
0
SEXP binit1BigMatrix(SEXP x, SEXP col, SEXP breaks)
{
  BigMatrix *pMat =  reinterpret_cast<BigMatrix*>(R_ExternalPtrAddr(x));
  if (pMat->separated_columns())
  {
    switch (pMat->matrix_type())
    {
      case 1:
        return CBinIt1<char>(SepMatrixAccessor<char>(*pMat),
          pMat->nrow(), col, breaks);
      case 2:
        return CBinIt1<short>(SepMatrixAccessor<short>(*pMat),
          pMat->nrow(), col, breaks);
      case 4:
        return CBinIt1<int>(SepMatrixAccessor<int>(*pMat),
          pMat->nrow(), col, breaks);
      case 8:
        return CBinIt1<double>(SepMatrixAccessor<double>(*pMat),
          pMat->nrow(), col, breaks);
    }
  }
  else
  {
    switch (pMat->matrix_type())
    {
      case 1:
        return CBinIt1<char>(MatrixAccessor<char>(*pMat),
          pMat->nrow(), col, breaks);
      case 2:
        return CBinIt1<short>(MatrixAccessor<short>(*pMat),
          pMat->nrow(), col, breaks);
      case 4:
        return CBinIt1<int>(MatrixAccessor<int>(*pMat),
          pMat->nrow(), col, breaks);
      case 8:
        return CBinIt1<double>(MatrixAccessor<double>(*pMat),
          pMat->nrow(), col, breaks);
    }
  }
  return R_NilValue;
}
Beispiel #7
0
SEXP kmeansMatrixEuclid(MatrixType x, index_type n, index_type m,
                  SEXP pcen, SEXP pclust, SEXP pclustsizes,
                  SEXP pwss, SEXP itermax)
{

  index_type j, col, nchange;

  int maxiters = Rf_asInteger(itermax);
  SEXP Riter;
  Rf_protect(Riter = Rf_allocVector(INTSXP, 1));
  int *iter = INTEGER(Riter);
  iter[0] = 0;

  BigMatrix *pcent = reinterpret_cast<BigMatrix*>(R_ExternalPtrAddr(pcen));
  MatrixAccessor<double> cent(*pcent);
  BigMatrix *Pclust = reinterpret_cast<BigMatrix*>(R_ExternalPtrAddr(pclust));
  MatrixAccessor<int> clust(*Pclust);
  BigMatrix *Pclustsizes = reinterpret_cast<BigMatrix*>(R_ExternalPtrAddr(pclustsizes));
  MatrixAccessor<double> clustsizes(*Pclustsizes);
  BigMatrix *Pwss = reinterpret_cast<BigMatrix*>(R_ExternalPtrAddr(pwss));
  MatrixAccessor<double> ss(*Pwss);

  int k = (int) pcent->nrow();                // number of clusters
  int cl, bestcl, oldcluster, newcluster;
  int done = 0;

  double temp;
  vector<double> d(k);                        // Vector of distances, internal only.
  vector<double> temp1(k);
  vector<vector<double> > tempcent(m, temp1); // For copy of global centroids k x m

  // At this point I can use [][] to access things, with ss[0][cl]
  // being used for the vectors, for example.
  // Before starting the loop, we only have cent (centers) as passed into the function.
  // Calculate clust and clustsizes, then update cent as centroids.
  
  for (cl=0; cl<k; cl++) clustsizes[0][cl] = 0.0;
  for (j=0; j<n; j++) {
    bestcl = 0;
    for (cl=0; cl<k; cl++) {
      d[cl] = 0.0;
      for (col=0; col<m; col++) {
        temp = (double)x[col][j] - cent[col][cl];
        d[cl] += temp * temp;
      }
      if (d[cl]<d[bestcl]) bestcl = cl;
    }
    clust[0][j] = bestcl + 1;          // Saving the R cluster number, not the C index.
    clustsizes[0][bestcl]++;
    for (col=0; col<m; col++)
      tempcent[col][bestcl] += (double)x[col][j];
  }
  for (cl=0; cl<k; cl++)
    for (col=0; col<m; col++)
      cent[col][cl] = tempcent[col][cl] / clustsizes[0][cl];

  do {

    nchange = 0;
    for (j=0; j<n; j++) { // For each of my points, this is offset from hash position

      oldcluster = clust[0][j] - 1;
      bestcl = 0;
      for (cl=0; cl<k; cl++) {         // Consider each of the clusters
        d[cl] = 0.0;                   // We'll get the distance to this cluster.
        for (col=0; col<m; col++) {    // Loop over the dimension of the data
          temp = (double)x[col][j] - cent[col][cl];
          d[cl] += temp * temp;
        }
        if (d[cl]<d[bestcl]) bestcl = cl;
      } // End of looking over the clusters for this j

      if (d[bestcl] < d[oldcluster]) {           // MADE A CHANGE!
        newcluster = bestcl;
        clust[0][j] = newcluster + 1;
        nchange++;
        clustsizes[0][newcluster]++;
        clustsizes[0][oldcluster]--;
        for (col=0; col<m; col++) {
          cent[col][oldcluster] += ( cent[col][oldcluster] - (double)x[col][j] ) / clustsizes[0][oldcluster];
          cent[col][newcluster] += ( (double)x[col][j] - cent[col][newcluster] ) / clustsizes[0][newcluster];
        }
      }

    } // End of this pass over my points.

    iter[0]++;
    if ( (nchange==0) || (iter[0]>=maxiters) ) done = 1;

  } while (done==0);

  // Collect the sums of squares now that we're done.
  for (cl=0; cl<k; cl++) ss[0][cl] = 0.0;
  for (j=0; j<n; j++) {
    for (col=0; col<m; col++) {
      cl = clust[0][j]-1;
      temp = (double)x[col][j] - cent[col][cl];
      ss[0][cl] += temp * temp;
    }
  }

  Rf_unprotect(1);
  return(Riter);

}
Beispiel #8
0
SEXP kmeansBigMatrix(SEXP x, SEXP cen, SEXP clust, SEXP clustsizes,
                     SEXP wss, SEXP itermax, SEXP dist)
{
  BigMatrix *pMat =  reinterpret_cast<BigMatrix*>(R_ExternalPtrAddr(x));
  int dist_calc = INTEGER(dist)[0];
  if (dist_calc == 0)
  {
    if (pMat->separated_columns())
    {
      switch (pMat->matrix_type())
      {
        case 1:
          return kmeansMatrixEuclid<char>(SepMatrixAccessor<char>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 2:
          return kmeansMatrixEuclid<short>(SepMatrixAccessor<short>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 4:
          return kmeansMatrixEuclid<int>(SepMatrixAccessor<int>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 8:
          return kmeansMatrixEuclid<double>(SepMatrixAccessor<double>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
      }
    }
    else
    {
      switch (pMat->matrix_type())
      {
        case 1:
          return kmeansMatrixEuclid<char>(MatrixAccessor<char>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 2:
          return kmeansMatrixEuclid<short>(MatrixAccessor<short>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 4:
          return kmeansMatrixEuclid<int>(MatrixAccessor<int>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 8:
          return kmeansMatrixEuclid<double>(MatrixAccessor<double>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
      }
    }
  }
  else
  {
    if (pMat->separated_columns())
    {
      switch (pMat->matrix_type())
      {
        case 1:
          return kmeansMatrixCosine<char>(SepMatrixAccessor<char>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 2:
          return kmeansMatrixCosine<short>(SepMatrixAccessor<short>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 4:
          return kmeansMatrixCosine<int>(SepMatrixAccessor<int>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 8:
           return kmeansMatrixCosine<double>(SepMatrixAccessor<double>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
      }
    }
    else
    {
      switch (pMat->matrix_type())
      {
        case 1:
          return kmeansMatrixCosine<char>(MatrixAccessor<char>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 2:
          return kmeansMatrixCosine<short>(MatrixAccessor<short>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 4:
          return kmeansMatrixCosine<int>(MatrixAccessor<int>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
        case 8:
           return kmeansMatrixCosine<double>(MatrixAccessor<double>(*pMat),
            pMat->nrow(), pMat->ncol(), cen, clust, clustsizes, wss, itermax);
      }
    }
  }
  return R_NilValue;
}