Exemple #1
0
SEXP dsyMatrix_trf(SEXP x)
{
    SEXP val = get_factors(x, "BunchKaufman"),
	dimP = GET_SLOT(x, Matrix_DimSym),
	uploP = GET_SLOT(x, Matrix_uploSym);
    int *dims = INTEGER(dimP), *perm, info;
    int lwork = -1, n = dims[0];
    const char *uplo = CHAR(STRING_ELT(uploP, 0));
    double tmp, *vx, *work;

    if (val != R_NilValue) return val;
    dims = INTEGER(dimP);
    val = PROTECT(NEW_OBJECT_OF_CLASS("BunchKaufman"));
    SET_SLOT(val, Matrix_uploSym, duplicate(uploP));
    SET_SLOT(val, Matrix_diagSym, mkString("N"));
    SET_SLOT(val, Matrix_DimSym, duplicate(dimP));
    vx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, n * n));
    AZERO(vx, n * n);
    F77_CALL(dlacpy)(uplo, &n, &n, REAL(GET_SLOT(x, Matrix_xSym)), &n, vx, &n);
    perm = INTEGER(ALLOC_SLOT(val, Matrix_permSym, INTSXP, n));
    F77_CALL(dsytrf)(uplo, &n, vx, &n, perm, &tmp, &lwork, &info);
    lwork = (int) tmp;
    C_or_Alloca_TO(work, lwork, double);

    F77_CALL(dsytrf)(uplo, &n, vx, &n, perm, work, &lwork, &info);

    if(lwork >= SMALL_4_Alloca) Free(work);
    if (info) error(_("Lapack routine dsytrf returned error code %d"), info);
    UNPROTECT(1);
    return set_factors(x, val, "BunchKaufman");
}
Exemple #2
0
/**
 * tune the proposal variances 
 *
 * @param da an input list object
 *
 */
static void tune_mcmc(SEXP da){
  int *dm = DIMS_SLOT(da);
  int nmh = dm[nmh_POS],
    etn = ceil(dm[tnit_POS] * 1.0 / dm[ntn_POS]) ;  // # iters per tuning loop;
  double *mh_sd = MHSD_SLOT(da), *acc = ACC_SLOT(da), 
    *sims = Calloc(etn * dm[nA_POS], double);
  int nmark = 0, *mark = Calloc(nmh, int);
  AZERO(mark, nmh);

  /* run MCMC and tune parameters */
  if (dm[rpt_POS]) Rprintf(_("Tuning phase...\n"));
  for (int i = 0; i < dm[ntn_POS]; i++) {
    do_mcmc(da, etn, 0, 1, etn, 0, sims);      /* run mcmc */    
    tune_var(nmh, acc, mh_sd, mark);           /* adjust proposal sd's */
    /* determine whether the parameters are fully tuned */
    nmark = 0;
    for (int j = 0; j < nmh; j++)
      if (mark[j] >= 3) nmark++;
    if (nmark == nmh) break;
  }
  if (dm[rpt_POS]){
    print_acc(1, nmh, acc, 1);
    print_line();
  }
  Free(sims);
  Free(mark);
}
Exemple #3
0
SEXP dpoMatrix_chol(SEXP x)
{
    SEXP val = get_factors(x, "Cholesky"),
         dimP = GET_SLOT(x, Matrix_DimSym),
         uploP = GET_SLOT(x, Matrix_uploSym);
    char *uplo = CHAR(STRING_ELT(uploP, 0));
    int *dims = INTEGER(dimP), info;
    int n = dims[0];
    double *vx;

    if (val != R_NilValue) return val;
    dims = INTEGER(dimP);
    val = PROTECT(NEW_OBJECT(MAKE_CLASS("Cholesky")));
    SET_SLOT(val, Matrix_uploSym, duplicate(uploP));
    SET_SLOT(val, Matrix_diagSym, mkString("N"));
    SET_SLOT(val, Matrix_DimSym, duplicate(dimP));
    vx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, n * n));
    AZERO(vx, n * n);
    F77_CALL(dlacpy)(uplo, &n, &n, REAL(GET_SLOT(x, Matrix_xSym)), &n, vx, &n);
    if (n > 0) {
        F77_CALL(dpotrf)(uplo, &n, vx, &n, &info);
        if (info) {
            if(info > 0)
                error(_("the leading minor of order %d is not positive definite"),
                      info);
            else /* should never happen! */
                error(_("Lapack routine %s returned error code %d"), "dpotrf", info);
        }
    }
    UNPROTECT(1);
    return set_factors(x, val, "Cholesky");
}
Exemple #4
0
SEXP dgeMatrix_crossprod(SEXP x, SEXP trans)
{
    int tr = asLogical(trans);/* trans=TRUE: tcrossprod(x) */
    SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dpoMatrix"))),
	nms = VECTOR_ELT(GET_SLOT(x, Matrix_DimNamesSym), tr ? 0 : 1),
	vDnms = ALLOC_SLOT(val, Matrix_DimNamesSym, VECSXP, 2);
    int *Dims = INTEGER(GET_SLOT(x, Matrix_DimSym)),
	*vDims = INTEGER(ALLOC_SLOT(val, Matrix_DimSym, INTSXP, 2));
    int k = tr ? Dims[1] : Dims[0], n = tr ? Dims[0] : Dims[1];
    double *vx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, n * n)),
	one = 1.0, zero = 0.0;

    AZERO(vx, n * n);
    SET_SLOT(val, Matrix_uploSym, mkString("U"));
    ALLOC_SLOT(val, Matrix_factorSym, VECSXP, 0);
    vDims[0] = vDims[1] = n;
    SET_VECTOR_ELT(vDnms, 0, duplicate(nms));
    SET_VECTOR_ELT(vDnms, 1, duplicate(nms));
    F77_CALL(dsyrk)("U", tr ? "N" : "T", &n, &k,
		    &one, REAL(GET_SLOT(x, Matrix_xSym)), Dims,
		    &zero, vx, &n);
    SET_SLOT(val, Matrix_factorSym, allocVector(VECSXP, 0));
    UNPROTECT(1);
    return val;
}
Exemple #5
0
/**
 * Establish a fill-reducing permutation for the sparse symmetric
 * matrix of order n represented by the column pointers Tp and row
 * indices Ti.
 *
 * @param n  order of the sparse symmetric matrix
 * @param Tp  column pointers (total length n + 1)
 * @param Ti  row indices (total length Tp[n])
 * @param Perm array of length n to hold the permutation
 * @param iPerm array of length n to hold the inverse permutation
 *
 */
void ssc_metis_order(int n, const int Tp [], const int Ti [],
                     int Perm[], int iPerm[])
{
    int  j, num_flag = 0, options_flag = 0;
    idxtype
    *perm = Calloc(n, idxtype), /* in case idxtype != int */
     *iperm = Calloc(n, idxtype),
      *xadj = Calloc(n+1, idxtype),
       *adj = Calloc(2 * (Tp[n] - n), idxtype);

    /* check row indices for correct range */
    for (j = 0; j < Tp[n]; j++)
        if (Ti[j] < 0 || Ti[j] >= n)
            error(_("row index Ti[%d] = %d is out of range [0,%d]"),
                  j, Ti[j], n - 1);
    /* temporarily use perm to store lengths */
    AZERO(perm, n);
    for (j = 0; j < n; j++) {
        int ip, p2 = Tp[j+1];
        for (ip = Tp[j]; ip < p2; ip++) {
            int i = Ti[ip];
            if (i != j) {
                perm[i]++;
                perm[j]++;
            }
        }
    }
    xadj[0] = 0;
    for (j = 0; j < n; j++) xadj[j+1] = xadj[j] + perm[j];
    /* temporarily use perm to store pointers */
    Memcpy(perm, xadj, n);
    for (j = 0; j < n; j++) {
        int ip, p2 = Tp[j+1];
        for (ip = Tp[j]; ip < p2; ip++) {
            int i = Ti[ip];
            if (i != j) {
                adj[perm[i]] = j;
                adj[perm[j]] = i;
                perm[i]++;
                perm[j]++;
            }
        }
    }
    METIS_NodeND(&n, xadj, adj, &num_flag, &options_flag, perm, iperm);
    for (j = 0; j < n; j++) {
        Perm[j] = (int) perm[j];
        iPerm[j] = (int) iperm[j];
    }
    Free(iperm);
    Free(perm);
    Free(xadj);
    Free(adj);
}
Exemple #6
0
static int *
install_diagonal_int(int *dest, SEXP A)
{
    int nc = INTEGER(GET_SLOT(A, Matrix_DimSym))[0];
    int i, ncp1 = nc + 1, unit = *diag_P(A) == 'U';
    int *ax = INTEGER(GET_SLOT(A, Matrix_xSym));

    AZERO(dest, nc * nc);
    for (i = 0; i < nc; i++)
	dest[i * ncp1] = (unit) ? 1 : ax[i];
    return dest;
}
Exemple #7
0
SEXP csc_matrix_mm(SEXP a, SEXP b, SEXP classed, SEXP right)
{
    int cl = asLogical(classed), rt = asLogical(right);
    SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgeMatrix")));
    int *adims = INTEGER(GET_SLOT(a, Matrix_DimSym)),
	*ai = INTEGER(GET_SLOT(a, Matrix_iSym)),
	*ap = INTEGER(GET_SLOT(a, Matrix_pSym)),
	*bdims = INTEGER(cl ? GET_SLOT(b, Matrix_DimSym) :
			 getAttrib(b, R_DimSymbol)),
	*cdims = INTEGER(ALLOC_SLOT(val, Matrix_DimSym, INTSXP, 2)),
	chk, ione = 1, j, jj, k, m, n;
    double *ax = REAL(GET_SLOT(a, Matrix_xSym)),
	*bx = REAL(cl ? GET_SLOT(b, Matrix_xSym) : b), *cx;

    if (rt) {
	m = bdims[0]; n = adims[1]; k = bdims[1]; chk = adims[0];
    } else {
	m = adims[0]; n = bdims[1]; k = adims[1]; chk = bdims[0];
    }
    if (chk != k)
	error(_("Matrices are not conformable for multiplication"));
    if (m < 1 || n < 1 || k < 1)
	error(_("Matrices with zero extents cannot be multiplied"));
    cx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, m * n));
    AZERO(cx, m * n); /* zero the accumulators */
    for (j = 0; j < n; j++) { /* across columns of c */
	if (rt) {
	    int kk, k2 = ap[j + 1];
	    for (kk = ap[j]; kk < k2; kk++) {
		F77_CALL(daxpy)(&m, &ax[kk], &bx[ai[kk]*m],
				&ione, &cx[j*m], &ione);
	    }
	} else {
	    double *ccol = cx + j * m,
		*bcol = bx + j * k;

	    for (jj = 0; jj < k; jj++) { /* across columns of a */
		int kk, k2 = ap[jj + 1];
		for (kk = ap[jj]; kk < k2; kk++) {
		    ccol[ai[kk]] += ax[kk] * bcol[jj];
		}
	    }
	}
    }
    cdims[0] = m; cdims[1] = n;
    UNPROTECT(1);
    return val;
}
Exemple #8
0
SEXP dtTMatrix_as_dtrMatrix(SEXP x)
{
    SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dtrMatrix"))),
	dimP = GET_SLOT(x, Matrix_DimSym),
	xiP = GET_SLOT(x, Matrix_iSym);
    int k, m = INTEGER(dimP)[0], n = INTEGER(dimP)[1], nnz = length(xiP);
    int *xi = INTEGER(xiP), *xj = INTEGER(GET_SLOT(x, Matrix_jSym)),
	sz = m * n;
    double *tx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, sz)),
	*xx = REAL(GET_SLOT(x, Matrix_xSym));

    SET_SLOT(val, Matrix_DimSym, duplicate(dimP));
    SET_SLOT(val, Matrix_uploSym, duplicate(GET_SLOT(x, Matrix_uploSym)));
    SET_SLOT(val, Matrix_diagSym, duplicate(GET_SLOT(x, Matrix_diagSym)));
    AZERO(tx, sz);
    for (k = 0; k < nnz; k++) tx[xi[k] + xj[k] * m] = xx[k];
    UNPROTECT(1);
    return val;
}
Exemple #9
0
static void do_mcmc(SEXP da, int nit, int nbn, int nth, int nS, 
		    int nR, double *sims){

  int *dm = DIMS_SLOT(da);
  int nU = dm[nU_POS], nmh = dm[nmh_POS],
    ns = 0, do_print = 0;
  /* initialize acc */
  double *acc = ACC_SLOT(da);
  AZERO(acc, nmh);

  /* run MCMC simulatons */
  GetRNGstate();
  for (int iter = 0; iter < nit; iter++){
    do_print = (nR > 0 && (iter + 1) % nR == 0);
    if (do_print) Rprintf(_("Iteration: %d \n "), iter + 1);

    /* update parameters */
    sim_beta(da);
    sim_phi_p(da);
    if (nU){
      sim_u(da);
      sim_Sigma(da);
    }
        
    /* store results  */
    if (iter >= nbn &&  (iter + 1 - nbn) % nth == 0 ){
      ns = (iter + 1 - nbn) / nth - 1;
      set_sims(da, ns, nS, sims);
    } 

    /* print out acceptance rate if necessary */
    if (do_print) print_acc(iter + 1, nmh, acc, 0);
    R_CheckUserInterrupt();
  }
  PutRNGstate();
  /* compute acceptance percentage */
  for (int i = 0; i < nmh; i++) acc[i] /= nit ;
}
Exemple #10
0
// returns a protected object
SEXP createTestRegression()
{
  SEXP regression = PROTECT(regression = NEW_OBJECT(MAKE_CLASS("bmer")));
  
  int protectCount = 0;
  
  // create and setup the dims slot
  int *dims = INTEGER(ALLOC_SLOT(regression, lme4_dimsSym, INTSXP, (int) (cvg_POS - nt_POS)));
  
  dims[n_POS]  = TEST_NUM_OBSERVATIONS;
  dims[p_POS]  = TEST_NUM_UNMODELED_COEFS;
  dims[nt_POS] = TEST_NUM_FACTORS;
  dims[isREML_POS] = FALSE;
  
  dims[q_POS] = 0;
  for (int i = 0; i < TEST_NUM_FACTORS; ++i) {
    dims[q_POS] += testNumGroupsPerFactor[i] * testNumModeledCoefPerFactor[i];
  }
  dims[np_POS] = dims[q_POS];
  
  int numObservations  = dims[n_POS];
  int numUnmodeledCoef = dims[p_POS];
  int numModeledCoef   = dims[q_POS];
  int numFactors       = dims[nt_POS];
  
  // create the deviance slot
  ALLOC_SLOT(regression, lme4_devianceSym, REALSXP, (int) (NULLdev_POS - ML_POS));
  
  // create and setup the Gp slot
  int *sparseRowsForFactor = INTEGER(ALLOC_SLOT(regression, lme4_GpSym, INTSXP, numFactors + 1));
  
  sparseRowsForFactor[0] = 0;
  for (int i = 0; i < numFactors; ++i) {
    sparseRowsForFactor[i + 1] = testNumGroupsPerFactor[i] * testNumModeledCoefPerFactor[i] + sparseRowsForFactor[i];
  }
  
  // create and setup the X slot
  SEXP denseDesignMatrixExp = ALLOC_SLOT(regression, lme4_XSym, REALSXP, numObservations * numUnmodeledCoef);
  SET_DIMS(denseDesignMatrixExp, numObservations, numUnmodeledCoef);
  double *denseDesignMatrix = REAL(denseDesignMatrixExp);
  for (int i = 0; i < numObservations; ++i) {
    denseDesignMatrix[i]                       = 1.0;
    denseDesignMatrix[i +     numObservations] = testDenseDesignMatrixColumn2[i];
    denseDesignMatrix[i + 2 * numObservations] = testDenseDesignMatrixColumn3[i];
  }
  
  double *response = REAL(ALLOC_SLOT(regression, lme4_ySym, REALSXP, numObservations));
  Memcpy(response, testResponse, numObservations);
  
  // sXwt slot
  double *sqrtObservationWeights = REAL(ALLOC_SLOT(regression, lme4_sqrtXWtSym, REALSXP, numObservations));
  for (int i = 0; i < numObservations; ++i) sqrtObservationWeights[i] = sqrt(testObservationWeights[i]);
  
  // create and setup the Zt slot
  SEXP sparseDesignMatrixExp = PROTECT(sparseDesignMatrixExp = NEW_OBJECT(MAKE_CLASS("dgCMatrix")));
  ++protectCount;
  SET_SLOT(regression, lme4_ZtSym, sparseDesignMatrixExp);
  
  
  int *sdm_dims = INTEGER(ALLOC_SLOT(sparseDesignMatrixExp, install("Dim"), INTSXP, 2));
  sdm_dims[0] = numModeledCoef;
  sdm_dims[1] = numObservations;
  
  int numSparseNonZeroes = 0;
  for (int i = 0; i < numFactors; ++i) numSparseNonZeroes += testNumModeledCoefPerFactor[i];
  numSparseNonZeroes *= numObservations;
  
  int *sdm_nonZeroRowIndices = INTEGER(ALLOC_SLOT(sparseDesignMatrixExp, install("i"), INTSXP, numSparseNonZeroes));
  Memcpy(sdm_nonZeroRowIndices, testSparseDesignMatrixNonZeroRowIndices, numSparseNonZeroes);

  int *sdm_indicesForColumn = INTEGER(ALLOC_SLOT(sparseDesignMatrixExp, install("p"), INTSXP, numObservations + 1));
  Memcpy(sdm_indicesForColumn, testSparseDesignMatrixIndicesForColumn, numObservations + 1);

  double *sdm_values = REAL(ALLOC_SLOT(sparseDesignMatrixExp, install("x"), REALSXP, numSparseNonZeroes));
  Memcpy(sdm_values, testSparseDesignMatrixValues, numSparseNonZeroes);
  
  
  // create and setup the A slot
  SEXP rotatedSparseDesignMatrixExp = PROTECT(rotatedSparseDesignMatrixExp = NEW_OBJECT(MAKE_CLASS("dgCMatrix")));
  ++protectCount;
  SET_SLOT(regression, lme4_ASym, rotatedSparseDesignMatrixExp);
  
  int *rsdm_dims = INTEGER(ALLOC_SLOT(rotatedSparseDesignMatrixExp, install("Dim"), INTSXP, 2));
  rsdm_dims[0] = numModeledCoef;
  rsdm_dims[1] = numObservations;
  
  int *rsdm_nonZeroRowIndices = INTEGER(ALLOC_SLOT(rotatedSparseDesignMatrixExp, install("i"), INTSXP, numSparseNonZeroes));
  Memcpy(rsdm_nonZeroRowIndices, testSparseDesignMatrixNonZeroRowIndices, numSparseNonZeroes);
  
  int *rsdm_indicesForColumn  = INTEGER(ALLOC_SLOT(rotatedSparseDesignMatrixExp, install("p"), INTSXP, numObservations + 1));
  Memcpy(rsdm_indicesForColumn, testSparseDesignMatrixIndicesForColumn, numObservations + 1);
  ALLOC_SLOT(rotatedSparseDesignMatrixExp, install("x"), REALSXP, numSparseNonZeroes);
  
  
  // ST slot
  SEXP stExp = ALLOC_SLOT(regression, lme4_STSym, VECSXP, numFactors);
  for (int i = 0; i < TEST_NUM_FACTORS; ++i) {
    SEXP stExp_i = PROTECT(allocVector(REALSXP, testNumModeledCoefPerFactor[i] * testNumModeledCoefPerFactor[i]));
    ++protectCount;
    SET_VECTOR_ELT(stExp, i, stExp_i);
    SET_DIMS(stExp_i, testNumModeledCoefPerFactor[i], testNumModeledCoefPerFactor[i]);
  
    double *stValues = REAL(stExp_i);
    Memcpy(stValues, testSTDecompositions[i], testNumModeledCoefPerFactor[i] * testNumModeledCoefPerFactor[i]);
  }
  
  // L slot
  SEXP upperLeftBlockLeftFactorizationExp = PROTECT(NEW_OBJECT(MAKE_CLASS("dCHMsimpl")));
  ++protectCount;
  SET_SLOT(regression, lme4_LSym, upperLeftBlockLeftFactorizationExp);
  
  int *ulfblf_permutation = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("perm"), INTSXP, numModeledCoef));
  Memcpy(ulfblf_permutation, testFactorizationPermutation, numModeledCoef);

  int *ulfblf_columnCounts = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("colcount"), INTSXP, numModeledCoef));
  Memcpy(ulfblf_columnCounts, testFactorizationColumnCounts, numModeledCoef);
  
  int numFactorizationNonZeroes = 0;
  for (int i = 0; i < numModeledCoef; ++i) numFactorizationNonZeroes += ulfblf_columnCounts[i];
  
  ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("x"), REALSXP, numFactorizationNonZeroes);
  
  int *ulfblf_indicesForColumn = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("p"), INTSXP, numModeledCoef + 1));
  Memcpy(ulfblf_indicesForColumn, testFactorizationIndicesForColumn, numModeledCoef + 1);
  
  int *ulfblf_nonZeroRowIndices = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("i"), INTSXP, numFactorizationNonZeroes));
  Memcpy(ulfblf_nonZeroRowIndices, testFactorizationNonZeroRowIndices, numFactorizationNonZeroes);
  
  int *ulfblf_numNonZeroes = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("nz"), INTSXP, numModeledCoef));
  Memcpy(ulfblf_numNonZeroes, testFactorizationNumNonZeroes, numModeledCoef);
  
  int *ulfblf_nextColumns = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("nxt"), INTSXP, numModeledCoef + 2));
  Memcpy(ulfblf_nextColumns, testFactorizationNextColumns, numModeledCoef + 2);
  
  int *ulfblf_prevColumns = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("prv"), INTSXP, numModeledCoef + 2));
  Memcpy(ulfblf_prevColumns, testFactorizationPrevColumns, numModeledCoef + 2);
  
  int *ulfblf_type = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("type"), INTSXP, 4));
  Memcpy(ulfblf_type, testFactorizationType, 4);
  
  int *ulfblf_dims = INTEGER(ALLOC_SLOT(upperLeftBlockLeftFactorizationExp, install("Dim"), INTSXP, 2));
  ulfblf_dims[0] = ulfblf_dims[1] = numModeledCoef;
  
  // misc slots
  ALLOC_SLOT(regression, lme4_offsetSym, REALSXP, 0);
  ALLOC_SLOT(regression, lme4_varSym,    REALSXP, 0);
  ALLOC_SLOT(regression, lme4_fixefSym,  REALSXP, numUnmodeledCoef);
  ALLOC_SLOT(regression, lme4_uSym,      REALSXP, numModeledCoef);
  ALLOC_SLOT(regression, lme4_CxSym,     REALSXP, numSparseNonZeroes);
  
  SEXP offDiagonalBlockRightFactorizationExp =
    ALLOC_SLOT(regression, lme4_RXSym, REALSXP, numUnmodeledCoef * numUnmodeledCoef);
  AZERO(REAL(offDiagonalBlockRightFactorizationExp), numUnmodeledCoef * numUnmodeledCoef);
  SET_DIMS(offDiagonalBlockRightFactorizationExp, numUnmodeledCoef, numUnmodeledCoef);

  SEXP lowerRightBlockRightFactorizationExp = 
    ALLOC_SLOT(regression, lme4_RZXSym, REALSXP, numModeledCoef * numUnmodeledCoef);
  SET_DIMS(lowerRightBlockRightFactorizationExp, numModeledCoef, numUnmodeledCoef);
  
  guaranteeValidPrior(regression);
  
  // at this point, everything should be jammed into the regression
  // or its objects
  UNPROTECT(protectCount);
  
  return (regression);
}
Exemple #11
0
/**
 * Update the theta_S parameters from the ST arrays in place.
 *
 * @param x an mer object
 * @param sigma current standard deviation of the per-observation
 *        noise terms.
 */
static void MCMC_S(SEXP x, double sigma)
{
    CHM_SP A = A_SLOT(x), Zt = Zt_SLOT(x);
    int *Gp = Gp_SLOT(x), *ai = (int*)(A->i),
         *ap = (int*)(A->p), *dims = DIMS_SLOT(x), *perm = PERM_VEC(x);
    int annz = ap[A->ncol], info, i1 = 1, n = dims[n_POS],
        nt = dims[nt_POS], ns, p = dims[p_POS], pos,
        q = dims[q_POS], znnz = ((int*)(Zt->p))[Zt->ncol];
    double *R, *ax = (double*)(A->x), *b = RANEF_SLOT(x),
                *eta = ETA_SLOT(x), *offset = OFFSET_SLOT(x),
                 *rr, *ss, one = 1, *u = U_SLOT(x), *y = Y_SLOT(x);
    int *nc = Alloca(nt, int), *nlev = Alloca(nt, int),
         *spt = Alloca(nt + 1, int);
    double **st = Alloca(nt, double*);
    R_CheckStack();

    ST_nc_nlev(GET_SLOT(x, lme4_STSym), Gp, st, nc, nlev);
    ns = 0;			/* ns is length(theta_S) */
    spt[0] = 0;			/* pointers into ss for terms */
    for (int i = 0; i < nt; i++) {
        ns += nc[i];
        spt[i + 1] = spt[i] + nc[i];
    }

    if (annz == znnz) { /* Copy Z' to A unless A has new nonzeros */
        Memcpy(ax, (double*)(Zt->x), znnz);
    } else error("Code not yet written for MCMC_S with NLMMs");
    /* Create T'Zt in A */
    Tt_Zt(A, Gp, nc, nlev, st, nt);
    /* Create P'u in ranef slot */
    for (int i = 0; i < q; i++) b[perm[i]] = u[i];
    /* Create X\beta + offset in eta slot */
    for (int i = 0; i < n; i++) eta[i] = offset ? offset[i] : 0;
    F77_CALL(dgemv)("N", &n, &p, &one, X_SLOT(x), &n,
                    FIXEF_SLOT(x), &i1, &one, eta, &i1);
    /* Allocate R, rr and ss */
    R = Alloca(ns * ns, double); /* crossproduct matrix then factor */
    rr = Alloca(ns, double);	 /* row of model matrix for theta_S */
    ss = Alloca(ns, double);	 /* right hand side, then theta_S */
    R_CheckStack();
    AZERO(R, ns * ns);
    AZERO(ss, ns);
    /* Accumulate crossproduct from pseudo-data part of model matrix */
    for (int i = 0; i < q; i++) {
        int sj = theta_S_ind(i, nt, Gp, nlev, spt);
        AZERO(rr, ns);
        rr[sj] = b[i];
        F77_CALL(dsyr)("U", &ns, &one, rr, &i1, R, &ns);
    }
    /* Accumulate crossproduct and residual product of the model matrix. */
    /* This is done one row at a time.  Rows of the model matrix
     * correspond to columns of T'Zt */
    for (int j = 0; j < n; j++) { /* jth column of T'Zt */
        AZERO(rr, ns);
        for (int p = ap[j]; p < ap[j + 1]; p++) {
            int i = ai[p];	/* row in T'Zt */
            int sj = theta_S_ind(i, nt, Gp, nlev, spt);

            rr[sj] += ax[p] * b[i];
            ss[sj] += rr[sj] * (y[j] - eta[j]);
        }
        F77_CALL(dsyr)("U", &ns, &one, rr, &i1, R, &ns);
    }
    F77_CALL(dposv)("U", &ns, &i1, R, &ns, ss, &ns, &info);
    if (info)
        error(_("Model matrix for theta_S is not positive definite, %d."), info);
    for (int j = 0; j < ns; j++) rr[j] = sigma * norm_rand();
    /* Sample from the conditional Gaussian distribution */
    F77_CALL(dtrsv)("U", "N", "N", &ns, R, &ns, rr, &i1);
    for (int j = 0; j < ns; j++) ss[j] += rr[j];
    /* Copy positive part of solution onto diagonals of ST */
    pos = 0;
    for (int i = 0; i < nt; i++) {
        for (int j = 0; j < nc[i]; j++) {
            st[i][j * (nc[i] + 1)] = (ss[pos] > 0) ? ss[pos] : 0;
            pos++;
        }
    }
    update_A(x);
}
Exemple #12
0
/**
 * Determine the conditional modes and the conditional variance of the
 * fixed effects given the data and the current random effects.
 * Create a Metropolis-Hasting proposal step from the multivariate
 * normal density, determine the acceptance probability and, if the
 * step is to be accepted, overwrite the contents of fixed with the
 * new contents.
 *
 * @param GS a GlmerStruct
 * @param b list of random effects
 * @param fixed current value of the fixed effects
 *
 * @return updated value of the fixed effects
 */
static double *
internal_glmer_fixef_update(GlmerStruct GS, SEXP b,
			    double fixed[])
{
    SEXP dmu_deta, var;
    int i, ione = 1, it, j, lwork = -1;
    double *ans = Calloc(GS->p, double), /* proposal point */
	*md = Calloc(GS->p, double), /* conditional modes */
	*w = Calloc(GS->n, double), *work,
	*wtd = Calloc(GS->n * GS->p, double),
	*z = Calloc(GS->n, double),
	crit, devr, one = 1, tmp, zero = 0;

    if (!isNewList(b) || LENGTH(b) != GS->nf)
	error(_("%s must be a %s of length %d"), "b", "list", GS->nf);
    for (i = 0; i < GS->nf; i++) {
	SEXP bi = VECTOR_ELT(b, i);
	if (!isReal(bi) || !isMatrix(bi))
	    error(_("b[[%d]] must be a numeric matrix"), i);
    }
    AZERO(z, GS->n);		/* -Wall */
    Memcpy(md, fixed, GS->p);
				/* calculate optimal size of work array */
    F77_CALL(dgels)("N", &(GS->n), &(GS->p), &ione, wtd, &(GS->n),
		    z,  &(GS->n), &tmp, &lwork, &j);
    if (j)			/* shouldn't happen */
	error(_("%s returned error code %d"), "dgels", j);
    lwork = (int) tmp;
    work = Calloc(lwork, double);

    AZERO(GS->off, GS->n); /* fitted values from random effects */
/*     fitted_ranef(GET_SLOT(GS->mer, lme4_flistSym), GS->unwtd, b, */
/* 		 INTEGER(GET_SLOT(GS->mer, lme4_ncSym)), GS->off); */
    for (i = 0; i < GS->n; i++)
	(GS->etaold)[i] = ((GS->off)[i] += (GS->offset)[i]);

    for (it = 0, crit = GS->tol + 1;
	 it < GS->maxiter && crit > GS->tol; it++) {
				/* fitted values from current beta */
	F77_CALL(dgemv)("N", &(GS->n), &(GS->p), &one,
			GS->X, &(GS->n), md,
			&ione, &zero, REAL(GS->eta), &ione);
				/* add in random effects and offset */
	vecIncrement(REAL(GS->eta), (GS->off), GS->n);
				/* check for convergence */
	crit = conv_crit(GS->etaold, REAL(GS->eta), GS->n);
				/* obtain mu, dmu_deta, var */
	eval_check_store(GS->linkinv, GS->rho, GS->mu);
	dmu_deta = PROTECT(eval_check(GS->mu_eta, GS->rho,
				      REALSXP, GS->n));
	var = PROTECT(eval_check(GS->var, GS->rho, REALSXP, GS->n));
				/* calculate weights and working residual */
	for (i = 0; i < GS->n; i++) {
	    w[i] = GS->wts[i] * REAL(dmu_deta)[i]/sqrt(REAL(var)[i]);
	    z[i] = w[i] * (REAL(GS->eta)[i] - (GS->off)[i] +
			   ((GS->y)[i] - REAL(GS->mu)[i]) /
			   REAL(dmu_deta)[i]);
	}
	UNPROTECT(2);
				/* weighted copy of the model matrix */
	for (j = 0; j < GS->p; j++)
	    for (i = 0; i < GS->n; i++)
		wtd[i + j * GS->n] = GS->X[i + j * GS->n] * w[i];
				/* weighted least squares solution */
	F77_CALL(dgels)("N", &(GS->n), &(GS->p), &ione, wtd, &(GS->n),
			z, &(GS->n), work, &lwork, &j);
	if (j) error(_("%s returned error code %d"), "dgels", j);
	Memcpy(md, z, GS->p);
    }
				/* wtd contains the Cholesky factor of
				 * the precision matrix */
    devr = normal_kernel(GS->p, md, wtd, GS->n, fixed);
    devr -= fixed_effects_deviance(GS, fixed);
    for (i = 0; i < GS->p; i++) {
	double var = norm_rand();
	ans[i] = var;
	devr -= var * var;
    }
    F77_CALL(dtrsv)("U", "N", "N", &(GS->p), wtd, &(GS->n), ans, &ione);
    for (i = 0; i < GS->p; i++) ans[i] += md[i];
    devr += fixed_effects_deviance(GS, ans);
    crit = exp(-0.5 * devr);	/* acceptance probability */
    tmp = unif_rand();
    if (asLogical(internal_getElement(GS->cv, "msVerbose"))) {
	Rprintf("%5.3f: ", crit);
	for (j = 0; j < GS->p; j++) Rprintf("%#10g ", ans[j]);
	Rprintf("\n");
    }
    if (tmp < crit) Memcpy(fixed, ans, GS->p);
    Free(ans); Free(md); Free(w);
    Free(work); Free(wtd); Free(z);
    return fixed;
}
Exemple #13
0
/**
 * Determine the deviance components associated with each of the
 * levels of a grouping factor at the conditional modes or a value
 * offset from the conditional modes by delb.
 *
 * @param GS pointer to a GlmerStruct
 * @param b conditional modes of the random effects
 * @param Gp group pointers
 * @param nc number of columns in the model matrix for the kth
 * grouping factor
 * @param k index (0-based) of the grouping factor
 * @param delb vector of length nc giving the changes in the
 * orthonormalized random effects
 * @param OmgFac Cholesky factor of the inverse of the penalty matrix
 * for this grouping factor
 * @param bVfac 3-dimensional array holding the factors of the
 * conditional variance-covariance matrix of the random effects
FIXME: This is wrong.  It is bVar[[i]] not bVfac that is being passed.
This only affects the AGQ method.
 * @param devcmp array to hold the deviance components
 *
 * @return devcmp
 */
static double*
rel_dev_1(GlmerStruct GS, const double b[], int nlev, int nc, int k,
	  const double delb[], const double OmgFac[],
	  const double bVfac[], double devcmp[])
{
    SEXP devs;
    int *fv = INTEGER(VECTOR_ELT(GET_SLOT(GS->mer, lme4_flistSym), k)),
	i, j;
    double *bcp = (double *) NULL;

    AZERO(devcmp, nlev);
    if (delb) {
	int ione = 1, ntot = nlev * nc;
	double sumsq = 0;
				/* copy the contents of b */
	bcp = Memcpy(Calloc(ntot, double), b, ntot);
	if (nc == 1) {
	    sumsq = delb[0] * delb[0];
	    for (i = 0; i < nlev; i++) b[i] += delb[0] * bVfac[i];
	} else {
	    int ncsq = nc * nc;
	    double *tmp = Calloc(nc, double);
	    for (i = 0; i < nlev; i++) {
		Memcpy(tmp, delb, nc);
		F77_CALL(dtrmv)("U", "N", "N", &nc, &(bVfac[i * ncsq]),
				&nc, tmp, &ione);
		for (j = 0; j < nc; j++) b[i + j * nc] = tmp[j];
	    }
				/* sum of squares of delb */
	    for (j = 0; j < nc; j++) sumsq += delb[j] * delb[j];
	}
	for (i = 0; i < nlev; i++) devcmp[i] = -sumsq;
    }
    internal_mer_fitted(GS->mer, GS->offset, REAL(GS->eta));
    eval_check_store(GS->linkinv, GS->rho, GS->mu);
    devs = PROTECT(eval_check(GS->dev_resids, GS->rho, REALSXP, GS->n));
    for (i = 0; i < GS->n; i++)
	devcmp[fv[i] - 1] += REAL(devs)[i];
    UNPROTECT(1);
    if (nc == 1) {
	for (i = 0; i < nlev; i++) {
	    double tmp = *OmgFac * b[i];
	    devcmp[i] += tmp * tmp;
	}
    } else {
	double *tmp = Calloc(nc, double);
	int ione = 1;

	for (i = 0; i < nlev; i++) {
	    for (j = 0; j < nc; j++) tmp[j] = b[i + j * nlev];
	    F77_CALL(dtrmv)("U", "N", "N", &nc, OmgFac, &nc,
			    tmp, &ione);
	    for (j = 0; j < nc; j++)
		devcmp[i] += tmp[j] * tmp[j];
	}
    }
    if (delb) {
	Memcpy(b, bcp, ntot);
	Free(bcp);
    }
    return devcmp;
}
Exemple #14
0
/**
 * Compute the approximation to the deviance using adaptive
 * Gauss-Hermite quadrature (AGQ).  When nAGQ == 1 this is the Laplace
 * approximation.
 *
 * @param pars pointer to a numeric vector of parameters
 * @param GSp pointer to a GlmerStruct object
 * @param nAGQp pointer to a scalar integer representing the number of
 * points in AGQ to use
 *
 * @return the approximation to the deviance as computed using AGQ
 */
SEXP glmer_devAGQ(SEXP pars, SEXP GSp, SEXP nAGQp)
{
    GlmerStruct GS = (GlmerStruct) R_ExternalPtrAddr(GSp);
    SEXP Omega = GET_SLOT(GS->mer, lme4_OmegaSym),
	bVar = GET_SLOT(GS->mer, lme4_bVarSym);
    int i, j, k, nAGQ = asInteger(nAGQp);
    int n2 = (nAGQ + 1)/2,
	*Gp = INTEGER(GET_SLOT(GS->mer, lme4_GpSym)),
	*nc = INTEGER(GET_SLOT(GS->mer, lme4_ncSym));
    double *f0, LaplaceDev = 0, AGQadjst = 0,
	*bhat = REAL(GET_SLOT(GS->mer, lme4_ranefSym));

    if (!isReal(pars) || LENGTH(pars) != GS->npar)
	error(_("`%s' must be a numeric vector of length %d"),
	      "pars", GS->npar);
    if (GS->nf > 1 && nAGQ > 1) {
	warning(_("AGQ not available for multiple grouping factors - using Laplace"));
	nAGQ = 1;
    }
    if (!internal_bhat(GS, REAL(pars), REAL(pars) + (GS->p)))
	return ScalarReal(DBL_MAX);

    for (i = 0; i < GS->nf; i++) {
	int nci = nc[i];
	int ncip1 = nci + 1, ncisqr = nci * nci,
	    nlev = (Gp[i + 1] - Gp[i])/nci;
	double *omgf = REAL(GET_SLOT(M_dpoMatrix_chol(VECTOR_ELT(Omega, i)), lme4_xSym)),
	    *bVi = Memcpy(Calloc(ncisqr * nlev, double),
			   REAL(VECTOR_ELT(bVar, i)), ncisqr * nlev);

        for (j = 0; j < nci; j++) { /* nlev * logDet(Omega_i) */
            LaplaceDev += 2 * nlev * log(omgf[j * ncip1]);
        }
        for (k = 0; k < nlev; k++) {
	    double *bVik = bVi + k * ncisqr;
            F77_CALL(dpotrf)("U", &nci, bVik, &nci, &j);
            if (j)
                error(_("Leading %d minor of bVar[[%d]][,,%d] not positive definite"),
                      j, i + 1, k + 1);
            for (j = 0; j < nci; j++) LaplaceDev -= 2 * log(bVik[j * ncip1]);
        }

	f0 = Calloc(nlev, double);
	rel_dev_1(GS, bhat, nlev, nci, i, (double *) NULL,
		  omgf, bVi, f0);
	for (k = 0; k < nlev; k++) LaplaceDev += f0[k];
	if (nAGQ > 1) {
	    double *fx = Calloc(nlev, double),
		*rellik = Calloc(nlev, double),
		*delb = Calloc(nci, double);

	    if (nci > 1) error(_("code not yet written"));
	    AZERO(rellik, nlev);	/* zero accumulator */
	    for (k = 0; k < n2; k++) {
		delb[0] = GHQ_x[nAGQ][k];
		if (delb[0]) {
		    rel_dev_1(GS, bhat, nlev, nci, i, delb,
			      omgf, bVi, fx);
		    for (j = 0; j < nlev; j++) {
			rellik[j] += GHQ_w[nAGQ][k] *
			    exp(-(fx[j] - f0[j])/2);
		    }
		    delb[0] *= -1;
		    rel_dev_1(GS, bhat, nlev, nci, i, delb,
			      omgf, bVi, fx);
		    for (j = 0; j < nlev; j++) {
			rellik[j] += GHQ_w[nAGQ][k] *
			    exp(-(fx[j] - f0[j])/2);
		    }
		} else {
		    for (j = 0; j < nlev; j++)
			rellik[j] += GHQ_w[nAGQ][k];
		}
	    }
	    for (j = 0; j < nlev; j++)
		AGQadjst -= 2 * log(rellik[j]);
	    Free(fx); Free(rellik);
	}
	Free(f0); Free(bVi);
    }
Exemple #15
0
SEXP magma_dgeMatrix_crossprod(SEXP x, SEXP trans)
{
#ifdef HIPLAR_WITH_MAGMA
	int tr = asLogical(trans);/* trans=TRUE: tcrossprod(x) */
	SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dpoMatrix"))),
	     nms = VECTOR_ELT(GET_SLOT(x, Matrix_DimNamesSym), tr ? 0 : 1),
	     vDnms = ALLOC_SLOT(val, Matrix_DimNamesSym, VECSXP, 2);
	int *Dims = INTEGER(GET_SLOT(x, Matrix_DimSym)),
	    *vDims = INTEGER(ALLOC_SLOT(val, Matrix_DimSym, INTSXP, 2));
	int k = tr ? Dims[1] : Dims[0], n = tr ? Dims[0] : Dims[1];
	double *vx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, n * n)),
		 one = 1.0, zero = 0.0;
	double *A =  REAL(GET_SLOT(x, Matrix_xSym));
	AZERO(vx, n * n);
	SET_SLOT(val, Matrix_uploSym, mkString("U"));
	ALLOC_SLOT(val, Matrix_factorSym, VECSXP, 0);
	vDims[0] = vDims[1] = n;
	SET_VECTOR_ELT(vDnms, 0, duplicate(nms));
	SET_VECTOR_ELT(vDnms, 1, duplicate(nms));
	if(n && GPUFlag == 1) {

#ifdef HIPLAR_DBG
		R_ShowMessage("DBG: Performing crossproduct using cublasDsyrk");
#endif
		cublasStatus retStatus;
		double *d_A, *d_C;

		/*retStatus = cublasCreate(&handle);
		  if ( retStatus != CUBLAS_STATUS_SUCCESS )		
		  error(_("CUBLAS initialisation failed"));
		  */

		cublasAlloc(n * k, sizeof(double), (void**)&d_A);
		/* Error Checking */
		retStatus = cublasGetError ();
		if (retStatus != CUBLAS_STATUS_SUCCESS) 
			error(_("CUBLAS: Error in Memory Allocation"));
		/********************************************/

		cublasAlloc(n * n, sizeof(double), (void**)&d_C);

		/* Error Checking */
		retStatus = cublasGetError ();
		if (retStatus != CUBLAS_STATUS_SUCCESS) 
			error(_("CUBLAS: Error in Memory Allocation"));
		/********************************************/

		cublasSetVector( n  * k , sizeof(double), A, 1, d_A, 1);

		/* Error Checking */
		retStatus = cublasGetError ();
		if (retStatus != CUBLAS_STATUS_SUCCESS) 
			error(_("CUBLAS: Error in Data Transfer to Device"));
		/********************************************/
		
		//cublasSetVector( n  * n , sizeof(double), vx, 1, d_C, 1);
		
		/* Error Checking */
		//retStatus = cublasGetError ();
		//if (retStatus != CUBLAS_STATUS_SUCCESS) 
		//	error(_("CUBLAS: Error in Data Transfer to Device"));
		/********************************************/


		cublasDsyrk('U' , tr ? 'N' : 'T', n, k, one, d_A, Dims[0], zero, d_C, n);

		cublasGetVector( n * n , sizeof(double), d_C, 1, vx, 1);

		/* Error Checking */
		retStatus = cublasGetError ();
		if (retStatus != CUBLAS_STATUS_SUCCESS) 
			error(_("CUBLAS: Error in Data Transfer from Device"));
		/********************************************/

		cublasFree(d_A);
		cublasFree(d_C);

	} else if(n){
	
#ifdef HIPLAR_DBG
		R_ShowMessage("DBG: Performing cross prod with dsyrk");
#endif
		F77_CALL(dsyrk)("U", tr ? "N" : "T", &n, &k, &one, A, Dims,
				&zero, vx, &n);
	}

	SET_SLOT(val, Matrix_factorSym, allocVector(VECSXP, 0));
	UNPROTECT(1);
	return val;
#endif
	return R_NilValue;
}
Exemple #16
0
SEXP magma_dpoMatrix_chol(SEXP x)
{
#ifdef HIPLAR_WITH_MAGMA
	SEXP val = get_factors(x, "Cholesky"),
			 dimP = GET_SLOT(x, Matrix_DimSym),
			 uploP = GET_SLOT(x, Matrix_uploSym);

	const char *uplo = CHAR(STRING_ELT(uploP, 0));
	int *dims = INTEGER(dimP), info;
	int n = dims[0];
	double *vx;
	cublasStatus retStatus;
	if (val != R_NilValue) return val;
	dims = INTEGER(dimP);
	val = PROTECT(NEW_OBJECT(MAKE_CLASS("Cholesky")));
	SET_SLOT(val, Matrix_uploSym, duplicate(uploP));
	SET_SLOT(val, Matrix_diagSym, mkString("N"));
	SET_SLOT(val, Matrix_DimSym, duplicate(dimP));
	vx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, n * n));
	AZERO(vx, n * n);
	
	//we could put in magmablas_dlacpy but it only
	//copies all of the matrix 
	F77_CALL(dlacpy)(uplo, &n, &n, REAL(GET_SLOT(x, Matrix_xSym)), &n, vx, &n);
	if (n > 0) {

		if(GPUFlag == 0){
#ifdef HIPLAR_DBG	
		R_ShowMessage("DBG: Cholesky decomposition using dpotrf;");
#endif
			F77_CALL(dpotrf)(uplo, &n, vx, &n, &info);
		}
		else if(GPUFlag == 1 && Interface == 0){
		
#ifdef HIPLAR_DBG	
			R_ShowMessage("DBG: Cholesky decomposition using magma_dpotrf;");
#endif			
			int nrows, ncols;
			nrows = ncols = n;

			magma_int_t lda;
			lda = nrows;

			magma_dpotrf(uplo[0], ncols, vx, lda, &info);

			/* Error Checking */
			retStatus = cudaGetLastError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in magma_dpotrf"));
			/********************************************/
			

		}
		else if(GPUFlag == 1 && Interface == 1) {
	
#ifdef HIPLAR_DBG	
			R_ShowMessage("DBG: Cholesky decomposition using magma_dpotrf_gpu;");	
#endif
			double *d_c;
			int nrows, ncols;
			nrows = ncols = n;
			int N2 = nrows * ncols;


			magma_int_t lda;
			lda = nrows;

			cublasAlloc(lda * ncols, sizeof(double), (void**)&d_c);
			
			/* Error Checking */
			retStatus = cublasGetError ();
			if (retStatus != CUBLAS_STATUS_SUCCESS) 
				error(_("CUBLAS: Error in Memory Allocation"));
			/********************************************/

			cublasSetVector(N2, sizeof(double), vx, 1, d_c, 1);
			
			/* Error Checking */
			retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in Date Transfer to Device"));
			/********************************************/


			magma_dpotrf_gpu(uplo[0], ncols, d_c, lda, &info);
			
			/* Error Checking */
			retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in magma_dpotrf_gpu"));
			/********************************************/
			

			cublasGetVector(nrows * ncols, sizeof(double), d_c, 1, vx, 1);		
			
			/* Error Checking */
			retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in Date Transfer from Device"));
			/********************************************/
			
			cublasFree(d_c);
		}
		else
			error(_("MAGMA/LAPACK/Interface Flag not defined correctly"));
		}
		
	if (info) {
			if(info > 0)
				error(_("the leading minor of order %d is not positive definite"),
						info);
			else /* should never happen! */
				error(_("Lapack routine %s returned error code %d"), "dpotrf", info);
		}

	UNPROTECT(1);
	return set_factors(x, val, "Cholesky");
#endif
	return R_NilValue;
}
/**
 * Matrix exponential - based on the _corrected_ code for Octave's expm function.
 *
 * @param x real square matrix to exponentiate
 *
 * @return matrix exponential of x
 */
SEXP dgeMatrix_exp(SEXP x)
{
    const double one = 1.0, zero = 0.0;
    const int i1 = 1;
    int *Dims = INTEGER(GET_SLOT(x, Matrix_DimSym));
    const int n = Dims[1], nsqr = n * n, np1 = n + 1;

    SEXP val = PROTECT(duplicate(x));
    int i, ilo, ilos, ihi, ihis, j, sqpow;
    int *pivot = Calloc(n, int);
    double *dpp = Calloc(nsqr, double), /* denominator power Pade' */
	*npp = Calloc(nsqr, double), /* numerator power Pade' */
	*perm = Calloc(n, double),
	*scale = Calloc(n, double),
	*v = REAL(GET_SLOT(val, Matrix_xSym)),
	*work = Calloc(nsqr, double), inf_norm, m1_j/*= (-1)^j */, trshift;
    R_CheckStack();

    if (n < 1 || Dims[0] != n)
	error(_("Matrix exponential requires square, non-null matrix"));
    if(n == 1) {
	v[0] = exp(v[0]);
	UNPROTECT(1);
	return val;
    }

    /* Preconditioning 1.  Shift diagonal by average diagonal if positive. */
    trshift = 0;		/* determine average diagonal element */
    for (i = 0; i < n; i++) trshift += v[i * np1];
    trshift /= n;
    if (trshift > 0.) {		/* shift diagonal by -trshift */
	for (i = 0; i < n; i++) v[i * np1] -= trshift;
    }

    /* Preconditioning 2. Balancing with dgebal. */
    F77_CALL(dgebal)("P", &n, v, &n, &ilo, &ihi, perm, &j);
    if (j) error(_("dgeMatrix_exp: LAPACK routine dgebal returned %d"), j);
    F77_CALL(dgebal)("S", &n, v, &n, &ilos, &ihis, scale, &j);
    if (j) error(_("dgeMatrix_exp: LAPACK routine dgebal returned %d"), j);

    /* Preconditioning 3. Scaling according to infinity norm */
    inf_norm = F77_CALL(dlange)("I", &n, &n, v, &n, work);
    sqpow = (inf_norm > 0) ? (int) (1 + log(inf_norm)/log(2.)) : 0;
    if (sqpow < 0) sqpow = 0;
    if (sqpow > 0) {
	double scale_factor = 1.0;
	for (i = 0; i < sqpow; i++) scale_factor *= 2.;
	for (i = 0; i < nsqr; i++) v[i] /= scale_factor;
    }

    /* Pade' approximation. Powers v^8, v^7, ..., v^1 */
    AZERO(npp, nsqr);
    AZERO(dpp, nsqr);
    m1_j = -1;
    for (j = 7; j >=0; j--) {
	double mult = padec[j];
	/* npp = m * npp + padec[j] *m */
	F77_CALL(dgemm)("N", "N", &n, &n, &n, &one, v, &n, npp, &n,
			&zero, work, &n);
	for (i = 0; i < nsqr; i++) npp[i] = work[i] + mult * v[i];
	/* dpp = m * dpp + (m1_j * padec[j]) * m */
	mult *= m1_j;
	F77_CALL(dgemm)("N", "N", &n, &n, &n, &one, v, &n, dpp, &n,
			&zero, work, &n);
	for (i = 0; i < nsqr; i++) dpp[i] = work[i] + mult * v[i];
	m1_j *= -1;
    }
    /* Zero power */
    for (i = 0; i < nsqr; i++) dpp[i] *= -1.;
    for (j = 0; j < n; j++) {
	npp[j * np1] += 1.;
	dpp[j * np1] += 1.;
    }

    /* Pade' approximation is solve(dpp, npp) */
    F77_CALL(dgetrf)(&n, &n, dpp, &n, pivot, &j);
    if (j) error(_("dgeMatrix_exp: dgetrf returned error code %d"), j);
    F77_CALL(dgetrs)("N", &n, &n, dpp, &n, pivot, npp, &n, &j);
    if (j) error(_("dgeMatrix_exp: dgetrs returned error code %d"), j);
    Memcpy(v, npp, nsqr);

    /* Now undo all of the preconditioning */
    /* Preconditioning 3: square the result for every power of 2 */
    while (sqpow--) {
	F77_CALL(dgemm)("N", "N", &n, &n, &n, &one, v, &n, v, &n,
			&zero, work, &n);
	Memcpy(v, work, nsqr);
    }
    /* Preconditioning 2: apply inverse scaling */
    for (j = 0; j < n; j++)
	for (i = 0; i < n; i++)
	    v[i + j * n] *= scale[i]/scale[j];


    /* 2 b) Inverse permutation  (if not the identity permutation) */
    if (ilo != 1 || ihi != n) {
	/* Martin Maechler's code */

#define SWAP_ROW(I,J) F77_CALL(dswap)(&n, &v[(I)], &n, &v[(J)], &n)

#define SWAP_COL(I,J) F77_CALL(dswap)(&n, &v[(I)*n], &i1, &v[(J)*n], &i1)

#define RE_PERMUTE(I)				\
	int p_I = (int) (perm[I]) - 1;		\
	SWAP_COL(I, p_I);			\
	SWAP_ROW(I, p_I)

	/* reversion of "leading permutations" : in reverse order */
	for (i = (ilo - 1) - 1; i >= 0; i--) {
	    RE_PERMUTE(i);
	}

	/* reversion of "trailing permutations" : applied in forward order */
	for (i = (ihi + 1) - 1; i < n; i++) {
	    RE_PERMUTE(i);
	}
    }

    /* Preconditioning 1: Trace normalization */
    if (trshift > 0.) {
	double mult = exp(trshift);
	for (i = 0; i < nsqr; i++) v[i] *= mult;
    }

    /* Clean up */
    Free(work); Free(scale); Free(perm); Free(npp); Free(dpp); Free(pivot);
    UNPROTECT(1);
    return val;
}
void gibbsOneWayAnova(double *y, int *N, int J, int sumN, int *whichJ, double rscale, int iterations, double *chains, double *CMDE, SEXP debug, int progress, SEXP pBar, SEXP rho)
{
	int i=0,j=0,m=0,Jp1sq = (J+1)*(J+1),Jsq=J*J,Jp1=J+1,npars=0;
	double ySum[J],yBar[J],sumy2[J],densDelta=0;
	double sig2=1,g=1;
	double XtX[Jp1sq], ZtZ[Jsq];
	double Btemp[Jp1sq],B2temp[Jsq],tempBetaSq=0;
	double muTemp[J],oneOverSig2temp=0;
	double beta[J+1],grandSum=0,grandSumSq=0;
	double shapeSig2 = (sumN+J*1.0)/2, shapeg = (J+1.0)/2;
	double scaleSig2=0, scaleg=0;
	double Xty[J+1],Zty[J];
	double logDet=0;
	double rscaleSq=rscale*rscale;
	
	double logSumSingle=0,logSumDouble=0;

	// for Kahan sum
	double kahanSumSingle=0, kahanSumDouble=0;
	double kahanCSingle=0,kahanCDouble=0;
	double kahanTempT=0, kahanTempY=0;
	
	int iOne=1, info;
	double dZero=0;
		

	// progress stuff
	SEXP sampCounter, R_fcall;
	int *pSampCounter;
    PROTECT(R_fcall = lang2(pBar, R_NilValue));
	PROTECT(sampCounter = NEW_INTEGER(1));
	pSampCounter = INTEGER_POINTER(sampCounter);
	
	npars=J+5;
	
	GetRNGstate();

	// Initialize to 0
	AZERO(XtX,Jp1sq);
	AZERO(ZtZ,Jsq);
	AZERO(beta,Jp1);
	AZERO(ySum,J);
	AZERO(sumy2,J);
	
	// Create vectors
	for(i=0;i<sumN;i++)
	{
		j = whichJ[i];
		ySum[j] += y[i];
		sumy2[j] += y[i]*y[i];
		grandSum += y[i];
		grandSumSq += y[i]*y[i];
	}
	
	
	// create design matrices
	XtX[0]=sumN;	
	for(j=0;j<J;j++)
	{
		XtX[j+1]=N[j];
		XtX[(J+1)*(j+1)]=N[j];
		XtX[(j+1)*(J+1) + (j+1)] = N[j];
		ZtZ[j*J + j] = N[j];
		yBar[j] = ySum[j]/(1.0*N[j]);
	}
	
	Xty[0] = grandSum;	
	Memcpy(Xty+1,ySum,J);
	Memcpy(Zty,ySum,J);
	
	// start MCMC
	for(m=0; m<iterations; m++)
	{
		R_CheckUserInterrupt();
	
		//Check progress
		
		if(progress && !((m+1)%progress)){
			pSampCounter[0]=m+1;
			SETCADR(R_fcall, sampCounter);
			eval(R_fcall, rho); //Update the progress bar
		}
		

		// sample beta
		Memcpy(Btemp,XtX,Jp1sq);
		for(j=0;j<J;j++){
			Btemp[(j+1)*(J+1)+(j+1)] += 1/g;
		}
		InvMatrixUpper(Btemp, J+1);
		internal_symmetrize(Btemp,J+1);	
		for(j=0;j<Jp1sq;j++)
			Btemp[j] *= sig2;
	
		oneOverSig2temp = 1/sig2;
		F77_CALL(dsymv)("U", &Jp1, &oneOverSig2temp, Btemp, &Jp1, Xty, &iOne, &dZero, beta, &iOne);
		
		rmvGaussianC(beta, Btemp, J+1);
		Memcpy(&chains[npars*m],beta,J+1);	
		
		
		// calculate density (Single Standardized)
		
		Memcpy(B2temp,ZtZ,Jsq);
		densDelta = -J*0.5*log(2*M_PI);
		for(j=0;j<J;j++)
		{
			B2temp[j*J+j] += 1/g;
			muTemp[j] = (ySum[j]-N[j]*beta[0])/sqrt(sig2);
		}
		InvMatrixUpper(B2temp, J);
		internal_symmetrize(B2temp,J);
		logDet = matrixDet(B2temp,J,J,1, &info);
		densDelta += -0.5*quadform(muTemp, B2temp, J, 1, J);
		densDelta += -0.5*logDet;
		if(m==0){
			logSumSingle = densDelta;
			kahanSumSingle = exp(densDelta);
		}else{
			logSumSingle =  logSumSingle + LogOnePlusX(exp(densDelta-logSumSingle));
			kahanTempY = exp(densDelta) - kahanCSingle;
			kahanTempT = kahanSumSingle + kahanTempY;
			kahanCSingle = (kahanTempT - kahanSumSingle) - kahanTempY;
			kahanSumSingle = kahanTempT;
		}
		chains[npars*m + (J+1) + 0] = densDelta;
		
		
		// calculate density (Double Standardized)
		densDelta += 0.5*J*log(g);
		if(m==0){
			logSumDouble = densDelta;
			kahanSumDouble = exp(densDelta);
		}else{
			logSumDouble =  logSumDouble + LogOnePlusX(exp(densDelta-logSumDouble));
			kahanTempY = exp(densDelta) - kahanCDouble;
			kahanTempT = kahanSumDouble + kahanTempY;
			kahanCDouble = (kahanTempT - kahanSumDouble) - kahanTempY;
			kahanSumDouble = kahanTempT;
		}
		chains[npars*m + (J+1) + 1] = densDelta;
		
		
		
		// sample sig2
		tempBetaSq = 0;
		scaleSig2 = grandSumSq - 2*beta[0]*grandSum + beta[0]*beta[0]*sumN;
		for(j=0;j<J;j++)
		{
			scaleSig2 += -2.0*(yBar[j]-beta[0])*N[j]*beta[j+1] + (N[j]+1/g)*beta[j+1]*beta[j+1];
			tempBetaSq += beta[j+1]*beta[j+1];
		}
		scaleSig2 *= 0.5;
		sig2 = 1/rgamma(shapeSig2,1/scaleSig2);
		chains[npars*m + (J+1) + 2] = sig2;
	
		// sample g
		scaleg = 0.5*(tempBetaSq/sig2 + rscaleSq);
		g = 1/rgamma(shapeg,1/scaleg);
		chains[npars*m + (J+1) + 3] = g;

	}
	
	CMDE[0] = logSumSingle - log(iterations);
	CMDE[1] = logSumDouble - log(iterations);
	CMDE[2] = log(kahanSumSingle) - log(iterations);
	CMDE[3] = log(kahanSumDouble) - log(iterations);
	
	UNPROTECT(2);
	PutRNGstate();
	
}