/* * n_vars is the number of variables to be considered, * d is the data array of variables d[0],...,d[n_vars-1], * pred determines which estimate is required: BLUE, BLUP, or BLP */ void gls(DATA **d /* pointer to DATA array */, int n_vars, /* length of DATA array (to consider) */ enum GLS_WHAT pred, /* what type of prediction is requested */ DPOINT *where, /* prediction location */ double *est /* output: array that holds the predicted values and variances */) { GLM *glm = NULL; /* to be copied to/from d */ static MAT *X0 = MNULL, *C0 = MNULL, *MSPE = MNULL, *CinvC0 = MNULL, *Tmp1 = MNULL, *Tmp2 = MNULL, *Tmp3 = MNULL, *R = MNULL; static VEC *blup = VNULL, *tmpa = VNULL, *tmpb = VNULL; PERM *piv = PNULL; volatile unsigned int i, rows_C; unsigned int j, k, l = 0, row, col, start_i, start_j, start_X, global, one_nbh_empty; VARIOGRAM *v = NULL; static enum GLS_WHAT last_pred = GLS_INIT; /* the initial value */ double c_value, *X_ori; int info; if (d == NULL) { /* clean up */ if (X0 != MNULL) M_FREE(X0); if (C0 != MNULL) M_FREE(C0); if (MSPE != MNULL) M_FREE(MSPE); if (CinvC0 != MNULL) M_FREE(CinvC0); if (Tmp1 != MNULL) M_FREE(Tmp1); if (Tmp2 != MNULL) M_FREE(Tmp2); if (Tmp3 != MNULL) M_FREE(Tmp3); if (R != MNULL) M_FREE(R); if (blup != VNULL) V_FREE(blup); if (tmpa != VNULL) V_FREE(tmpa); if (tmpb != VNULL) V_FREE(tmpb); last_pred = GLS_INIT; return; } if (DEBUG_COV) { printlog("we're at %s X: %g Y: %g Z: %g\n", IS_BLOCK(where) ? "block" : "point", where->x, where->y, where->z); } if (pred != UPDATE) /* it right away: */ last_pred = pred; assert(last_pred != GLS_INIT); if (d[0]->glm == NULL) { /* allocate and initialize: */ glm = new_glm(); d[0]->glm = (void *) glm; } else glm = (GLM *) d[0]->glm; glm->mu0 = v_resize(glm->mu0, n_vars); MSPE = m_resize(MSPE, n_vars, n_vars); if (pred == GLS_BLP || UPDATE_BLP) { X_ori = where->X; for (i = 0; i < n_vars; i++) { /* mu(0) */ glm->mu0->ve[i] = calc_mu(d[i], where); blup = v_copy(glm->mu0, v_resize(blup, glm->mu0->dim)); where->X += d[i]->n_X; /* shift to next x0 entry */ } where->X = X_ori; /* ... and set back */ for (i = 0; i < n_vars; i++) { /* Cij(0,0): */ for (j = 0; j <= i; j++) { v = get_vgm(LTI(d[i]->id,d[j]->id)); ME(MSPE, i, j) = ME(MSPE, j, i) = COVARIANCE0(v, where, where, d[j]->pp_norm2); } } fill_est(NULL, blup, MSPE, n_vars, est); /* in case of empty neighbourhood */ } /* xxx */ /* logprint_variogram(v, 1); */ /* * selection dependent problem dimensions: */ for (i = rows_C = 0, one_nbh_empty = 0; i < n_vars; i++) { rows_C += d[i]->n_sel; if (d[i]->n_sel == 0) one_nbh_empty = 1; } if (rows_C == 0 /* all selection lists empty */ || one_nbh_empty == 1) { /* one selection list empty */ if (pred == GLS_BLP || UPDATE_BLP) debug_result(blup, MSPE, pred); return; } for (i = 0, global = 1; i < n_vars && global; i++) global = (d[i]->sel == d[i]->list && d[i]->n_list == d[i]->n_original && d[i]->n_list == d[i]->n_sel); /* * global things: enter whenever (a) first time, (b) local selections or * (c) the size of the problem grew since the last call (e.g. simulation) */ if (glm->C == NULL || !global || rows_C > glm->C->m) { /* * fill y: */ glm->y = get_y(d, glm->y, n_vars); if (pred != UPDATE) { glm->C = m_resize(glm->C, rows_C, rows_C); if (gl_choleski == 0) /* use LDL' decomposition, allocate piv: */ piv = px_resize(piv, rows_C); m_zero(glm->C); glm->X = get_X(d, glm->X, n_vars); M_DEBUG(glm->X, "X"); glm->CinvX = m_resize(glm->CinvX, rows_C, glm->X->n); glm->XCinvX = m_resize(glm->XCinvX, glm->X->n, glm->X->n); glm->beta = v_resize(glm->beta, glm->X->n); for (i = start_X = start_i = 0; i < n_vars; i++) { /* row var */ /* fill C, mu: */ for (j = start_j = 0; j <= i; j++) { /* col var */ v = get_vgm(LTI(d[i]->id,d[j]->id)); for (k = 0; k < d[i]->n_sel; k++) { /* rows */ row = start_i + k; for (l = 0, col = start_j; col <= row && l < d[j]->n_sel; l++, col++) { if (pred == GLS_BLUP) c_value = GCV(v, d[i]->sel[k], d[j]->sel[l]); else c_value = COVARIANCE(v, d[i]->sel[k], d[j]->sel[l]); /* on the diagonal, if necessary, add measurement error variance */ if (d[i]->colnvariance && i == j && k == l) c_value += d[i]->sel[k]->variance; ME(glm->C, col, row) = c_value; /* fill upper */ if (col != row) ME(glm->C, row, col) = c_value; /* fill all */ } /* for l */ } /* for k */ start_j += d[j]->n_sel; } /* for j */ start_i += d[i]->n_sel; if (d[i]->n_sel > 0) start_X += d[i]->n_X - d[i]->n_merge; } /* for i */ /* if (d[0]->colnvmu) glm->C = convert_vmuC(glm->C, d[0]); */ if (d[0]->variance_fn) { glm->mu = get_mu(glm->mu, glm->y, d, n_vars); convert_C(glm->C, glm->mu, d[0]->variance_fn); } if (DEBUG_COV && pred == GLS_BLUP) printlog("[using generalized covariances: max_val - semivariance()]"); M_DEBUG(glm->C, "Covariances (x_i, x_j) matrix C (upper triangle)"); /* * factorize C: */ CHfactor(glm->C, piv, &info); if (info != 0) { /* singular: */ pr_warning("Covariance matrix singular at location [%g,%g,%g]: skipping...", where->x, where->y, where->z); m_free(glm->C); glm->C = MNULL; /* assure re-entrance if global */ P_FREE(piv); return; } if (piv == NULL) M_DEBUG(glm->C, "glm->C, Choleski decomposed:") else M_DEBUG(glm->C, "glm->C, LDL' decomposed:") } /* if (pred != UPDATE) */
/*---------------------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------------------------ MAIN FUNCTION ------------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------------ */ SEXP local_poly_estimator(SEXP X, SEXP Y, SEXP points, SEXP band, SEXP grid1, SEXP degree_poly, SEXP kernel_type1, SEXP deriv1) { int i, j; i = 0;j = 0; /* Digest the datastructures (SEXPs) from R */ double *xptr, *yptr, *grid; int kernel_type = INTEGER_VALUE(kernel_type1); int degree_pol = INTEGER_VALUE(degree_poly); int deriv = INTEGER_VALUE(deriv1); PROTECT(grid1 = coerceVector (grid1, REALSXP) ) ; grid = REAL(grid1); SEXP dimgrid = coerceVector(getAttrib(grid1, R_DimSymbol), INTSXP); int n_grid = INTEGER(dimgrid)[1]; // get dimensions of matrix X SEXP dimX = coerceVector(getAttrib(X, R_DimSymbol), INTSXP); d = INTEGER(dimX)[0]; n = INTEGER(dimX)[1]; // get dimensions of matrix points double *pontos; int n_pontos, d_pontos; SEXP dimpoints = coerceVector(getAttrib(points, R_DimSymbol), INTSXP); d_pontos = INTEGER(dimpoints)[0]; n_pontos = INTEGER(dimpoints)[1]; if ((d > 1) && (d_pontos == 1)) // X is a matrix n by d and points is a vector { // then, points is one point of d dimension n_pontos = 1; d_pontos = d; } PROTECT(X = coerceVector (X, REALSXP) ) ; xptr = REAL(X); PROTECT(Y = coerceVector (Y, REALSXP) ) ; yptr = REAL(Y); PROTECT(points = coerceVector (points, REALSXP) ) ; pontos = REAL(points); // aux is at each step the point x at which we predict y double aux[d]; int k; // pred is the predicted values that will be returned SEXP pred; double *p_pred; PROTECT(pred = NEW_NUMERIC(n_pontos)); p_pred = NUMERIC_POINTER(pred); PROTECT(band = coerceVector (band, REALSXP) ) ; double * banda = REAL(band); // banda must have dimensions: n_points by d SEXP bandwidth; double *p_bandwidth; PROTECT(bandwidth = NEW_NUMERIC(d*n_pontos)); p_bandwidth = NUMERIC_POINTER(bandwidth); // ------------------------------------------------------------- Cross Validation or GCV if ((banda[0] == 0) || (banda[0] == -1)) { GCV(xptr, yptr, n , d , kernel_type, grid, n_grid, degree_pol, deriv, p_bandwidth); for (i = 1; i < n_pontos; i++) for (j = 0; j < d; j++) p_bandwidth[i*d + j] = p_bandwidth[j]; } else // ------------------------------------------------------------- Cross Validation or GCV multidimensional if ((banda[0] == -2) || (banda[0] == -3)) { GCV_each_dimens(xptr, yptr, n , d , kernel_type, grid, n_grid, degree_pol, deriv, p_bandwidth); for (i = 1; i < n_pontos; i++) for (j = 0; j < d; j++) p_bandwidth[i*d + j] = p_bandwidth[j]; } else // ------------------------------------------------------------- { // if no cross-validation, I still need to fill the matrix of bandwidths // where each row correspond to a point in 'points' sent by the user here for (i = 0; i < n_pontos; i++) for (j = 0; j < d; j++) p_bandwidth[i*d + j] = banda[i*d + j]; } // variables used to solve (X'X)^-1X'Y const int m = n; int n2; if (degree_pol == 1) n2 = 1 + d; else if (degree_pol == 2) n2 = 1+d + d*(d+1)/2; else n2 = degree_pol + 1; double a[n2*n]; // this will be X double b[n]; // this will be Y const int nrhs = 1; const int lda = n; const int ldb = n; int lwork; int mn = m; if (n2 < m) mn = n2; if (mn == 1) lwork = mn + 1; else lwork = mn + mn; int info = 0; double work[lwork]; for(i = 0; i < lwork; i++) work[i] = 0; // ------------------------------------------------------------------------------------------------- Prediction for (i = 0; i < n_pontos; i++) { // ------------------------------------ construct aux //aux is the point where m1 is to be estimated if (d == 1) aux[0] = pontos[i]; else if (n_pontos == 1) // here, X is a matrix n by d (d>1) for (j = 0; j < d; j++) //and points is a vector size d, thus there is 1 point aux[j] = pontos[j]; else { for (j = 0; j < d; j++) aux[j] = pontos[i*d + j]; } // for each observation in X construct a and obtain beta_hat_0 = m_hat(aux) for (j = 0; j < n; j++) { // construct a = sqrt(W)XX a[j] = 1; for (k = 1; k <= d; k++) a[j] = a[j]*sqrt(K(kernel_type, (xptr[j*d + k-1]-aux[k-1])/p_bandwidth[i*d + k-1])); // for a vector of bandwidths if ((degree_pol == 1) || (degree_pol == 2)) // add columns X1-x, X2-x,... Xd-x for (k = 1; k <= d; k++) a[j+n*k] = (xptr[j*d + k-1]-aux[k-1])*a[j]; // note that a is transpose manner if (degree_pol == 2) // include columns of half vectorization: VECH { int l, ind_vech; ind_vech = 1; for (k = 1; k <= d; k++) for (l = k; l <= d; l++) { a[j+n*d+n*ind_vech] = (xptr[j*d + k-1]-aux[k-1])*(xptr[j*d + l-1]-aux[l-1])*a[j]; ind_vech = ind_vech + 1; } } if ((degree_pol > 2) && (d == 1)) // works only for d == 1 for (k = 1; k <= degree_pol; k++) a[j+n*k] = pow((xptr[j]-aux[0]),k)*a[j]; b[j] = yptr[j]*a[j]; // b = sqrt(W)Y } // reg does (a'a)^-1a'b reg(&m, &n2, &nrhs, a, &lda, b, &ldb, work, &lwork, &info); p_pred[i] = factorial(deriv)*b[deriv]; } // -------------------------------------------------------------------------------------------------- Prediction SEXP list, list_names; char *names[2] = {"predicted", "bandwidth"}; PROTECT(list_names = allocVector(STRSXP,2)); PROTECT(list = allocVector(VECSXP, 2)); for(i = 0; i < 2; i++) SET_STRING_ELT(list_names,i,mkChar(names[i])); SET_VECTOR_ELT(list, 0, pred); SET_VECTOR_ELT(list, 1, bandwidth); setAttrib(list, R_NamesSymbol, list_names); UNPROTECT( 9 ) ; return(list); }
/* * n_vars is the number of variables to be considered, * d is the data array of variables d[0],...,d[n_vars-1], * pred determines which estimate is required: BLUE, BLUP, or BLP */ void gls(DATA **d /* pointer to DATA array */, int n_vars, /* length of DATA array (to consider) */ enum GLS_WHAT pred, /* what type of prediction is requested */ DPOINT *where, /* prediction location */ double *est /* output: array that holds the predicted values and variances */) { GLM *glm = NULL; /* to be copied to/from d */ static MAT *X0 = MNULL, *C0 = MNULL, *MSPE = MNULL, *CinvC0 = MNULL, *Tmp1 = MNULL, *Tmp2 = MNULL, *Tmp3, *R = MNULL; static VEC *blup = VNULL, *tmpa = VNULL, *tmpb = VNULL; volatile unsigned int i, rows_C; unsigned int j, k, l = 0, row, col, start_i, start_j, start_X, global; VARIOGRAM *v = NULL; static enum GLS_WHAT last_pred = GLS_INIT; /* the initial value */ double c_value, *X_ori; if (d == NULL) { /* clean up */ if (X0 != MNULL) M_FREE(X0); if (C0 != MNULL) M_FREE(C0); if (MSPE != MNULL) M_FREE(MSPE); if (CinvC0 != MNULL) M_FREE(CinvC0); if (Tmp1 != MNULL) M_FREE(Tmp1); if (Tmp2 != MNULL) M_FREE(Tmp2); if (Tmp3 != MNULL) M_FREE(Tmp3); if (R != MNULL) M_FREE(R); if (blup != VNULL) V_FREE(blup); if (tmpa != VNULL) V_FREE(tmpa); if (tmpb != VNULL) V_FREE(tmpb); last_pred = GLS_INIT; return; } #ifndef HAVE_SPARSE if (gl_sparse) { pr_warning("sparse matrices not supported: compile with --with-sparse"); gl_sparse = 0; } #endif if (DEBUG_COV) { printlog("we're at %s X: %g Y: %g Z: %g\n", IS_BLOCK(where) ? "block" : "point", where->x, where->y, where->z); } if (pred != UPDATE) /* it right away: */ last_pred = pred; assert(last_pred != GLS_INIT); if (d[0]->glm == NULL) { /* allocate and initialize: */ glm = new_glm(); d[0]->glm = (void *) glm; } else glm = (GLM *) d[0]->glm; glm->mu0 = v_resize(glm->mu0, n_vars); MSPE = m_resize(MSPE, n_vars, n_vars); if (pred == GLS_BLP || UPDATE_BLP) { X_ori = where->X; for (i = 0; i < n_vars; i++) { /* mu(0) */ glm->mu0->ve[i] = calc_mu(d[i], where); blup = v_copy(glm->mu0, v_resize(blup, glm->mu0->dim)); where->X += d[i]->n_X; /* shift to next x0 entry */ } where->X = X_ori; /* ... and set back */ for (i = 0; i < n_vars; i++) { /* Cij(0,0): */ for (j = 0; j <= i; j++) { v = get_vgm(LTI(d[i]->id,d[j]->id)); MSPE->me[i][j] = MSPE->me[j][i] = COVARIANCE0(v, where, where, d[j]->pp_norm2); } } fill_est(NULL, blup, MSPE, n_vars, est); /* in case of empty neighbourhood */ } /* xxx */ /* logprint_variogram(v, 1); */ /* * selection dependent problem dimensions: */ for (i = rows_C = 0; i < n_vars; i++) rows_C += d[i]->n_sel; if (rows_C == 0) { /* empty selection list(s) */ if (pred == GLS_BLP || UPDATE_BLP) debug_result(blup, MSPE, pred); return; } for (i = 0, global = 1; i < n_vars && global; i++) global = (d[i]->sel == d[i]->list && d[i]->n_list == d[i]->n_original); /* * global things: enter whenever (a) first time, (b) local selections or * (c) the size of the problem grew since the last call (e.g. simulation) */ if ((glm->C == NULL && glm->spC == NULL) || !global || rows_C > glm->C->m) { /* * fill y: */ glm->y = get_y(d, glm->y, n_vars); if (pred != UPDATE) { if (! gl_sparse) { glm->C = m_resize(glm->C, rows_C, rows_C); m_zero(glm->C); } #ifdef HAVE_SPARSE else { if (glm->C == NULL) { glm->spC = sp_get(rows_C, rows_C, gl_sparse); /* d->spLLT = spLLT = sp_get(rows_C, rows_C, gl_sparse); */ } else { glm->spC = sp_resize(glm->spC, rows_C, rows_C); /* d->spLLT = spLLT = sp_resize(spLLT, rows_C, rows_C); */ } sp_zero(glm->spC); } #endif glm->X = get_X(d, glm->X, n_vars); M_DEBUG(glm->X, "X"); glm->CinvX = m_resize(glm->CinvX, rows_C, glm->X->n); glm->XCinvX = m_resize(glm->XCinvX, glm->X->n, glm->X->n); glm->beta = v_resize(glm->beta, glm->X->n); for (i = start_X = start_i = 0; i < n_vars; i++) { /* row var */ /* fill C, mu: */ for (j = start_j = 0; j <= i; j++) { /* col var */ v = get_vgm(LTI(d[i]->id,d[j]->id)); for (k = 0; k < d[i]->n_sel; k++) { /* rows */ row = start_i + k; for (l = 0, col = start_j; col <= row && l < d[j]->n_sel; l++, col++) { if (pred == GLS_BLUP) c_value = GCV(v, d[i]->sel[k], d[j]->sel[l]); else c_value = COVARIANCE(v, d[i]->sel[k], d[j]->sel[l]); /* on the diagonal, if necessary, add measurement error variance */ if (d[i]->colnvariance && i == j && k == l) c_value += d[i]->sel[k]->variance; if (! gl_sparse) glm->C->me[row][col] = c_value; #ifdef HAVE_SPARSE else { if (c_value != 0.0) sp_set_val(glm->spC, row, col, c_value); } #endif } /* for l */ } /* for k */ start_j += d[j]->n_sel; } /* for j */ start_i += d[i]->n_sel; if (d[i]->n_sel > 0) start_X += d[i]->n_X - d[i]->n_merge; } /* for i */ /* if (d[0]->colnvmu) glm->C = convert_vmuC(glm->C, d[0]); */ if (d[0]->variance_fn) { glm->mu = get_mu(glm->mu, glm->y, d, n_vars); convert_C(glm->C, glm->mu, d[0]->variance_fn); } if (DEBUG_COV && pred == GLS_BLUP) printlog("[using generalized covariances: max_val - semivariance()]"); if (! gl_sparse) { M_DEBUG(glm->C, "Covariances (x_i, x_j) matrix C (lower triangle only)"); } #ifdef HAVE_SPARSE else { SM_DEBUG(glm->spC, "Covariances (x_i, x_j) sparse matrix C (lower triangle only)") } #endif /* check for singular C: */ if (! gl_sparse && gl_cn_max > 0.0) { for (i = 0; i < rows_C; i++) /* row */ for (j = i+1; j < rows_C; j++) /* col > row */ glm->C->me[i][j] = glm->C->me[j][i]; /* fill symmetric */ if (is_singular(glm->C, gl_cn_max)) { pr_warning("Covariance matrix (nearly) singular at location [%g,%g,%g]: skipping...", where->x, where->y, where->z); m_free(glm->C); glm->C = MNULL; /* assure re-entrance if global */ return; } } /* * factorize C: */ if (! gl_sparse) LDLfactor(glm->C); #ifdef HAVE_SPARSE else { sp_compact(glm->spC, 0.0); spCHfactor(glm->spC); } #endif } /* if (pred != UPDATE) */ if (pred != GLS_BLP && !UPDATE_BLP) { /* C-1 X and X'C-1 X, beta */ /* * calculate CinvX: */ tmpa = v_resize(tmpa, rows_C); for (i = 0; i < glm->X->n; i++) { tmpa = get_col(glm->X, i, tmpa); if (! gl_sparse) tmpb = LDLsolve(glm->C, tmpa, tmpb); #ifdef HAVE_SPARSE else tmpb = spCHsolve(glm->spC, tmpa, tmpb); #endif set_col(glm->CinvX, i, tmpb); } /* * calculate X'C-1 X: */ glm->XCinvX = mtrm_mlt(glm->X, glm->CinvX, glm->XCinvX); /* X'C-1 X */ M_DEBUG(glm->XCinvX, "X'C-1 X"); if (gl_cn_max > 0.0 && is_singular(glm->XCinvX, gl_cn_max)) { pr_warning("X'C-1 X matrix (nearly) singular at location [%g,%g,%g]: skipping...", where->x, where->y, where->z); m_free(glm->C); glm->C = MNULL; /* assure re-entrance if global */ return; } m_inverse(glm->XCinvX, glm->XCinvX); /* * calculate beta: */ tmpa = vm_mlt(glm->CinvX, glm->y, tmpa); /* X'C-1 y */ glm->beta = vm_mlt(glm->XCinvX, tmpa, glm->beta); /* (X'C-1 X)-1 X'C-1 y */ V_DEBUG(glm->beta, "beta"); M_DEBUG(glm->XCinvX, "Cov(beta), (X'C-1 X)-1"); M_DEBUG(R = get_corr_mat(glm->XCinvX, R), "Corr(beta)"); } /* if pred != GLS_BLP */ } /* if redo the heavy part */