void wls_cpu(int_t y_cols, int_t y_rows, double* wts, double* y, double* out_beta_cpu){ double *xtwx = (double *)calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),sizeof(double)); double *xtwy = (double *)calloc((y_rows+y_cols),sizeof(double)); //XTWX start XTWX(y_rows, y_cols, wts, xtwx); // printf("\nXTWX done\n"); fflush(stdout); //XTWXinv XTWXinv(y_rows, y_cols, xtwx); // printf("\nXTWXinv done\n"); fflush(stdout); //XTWY XTWY(y_rows, y_cols, wts, y, xtwy); // printf("\nXTWY done\n"); fflush(stdout); int i, j; //OUT_BETA for (i=0; i < y_rows+y_cols-1; i++){ out_beta_cpu[i] = 0.0; for (j=0; j < y_rows+y_cols-1; j++){ out_beta_cpu[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j]; } } free(xtwx); free(xtwy); return 0; }
int main(){ struct timeval start, end; long utime; int_t y_rows = Y_ROWS; int_t y_cols = Y_COLS; double *wts = (double *)calloc((y_rows*y_cols),sizeof(double)); double *y = (double *)calloc((y_rows*y_cols),sizeof(double)); printf("testing here\n"); #ifdef CPU_COMPUTE double *xtwx = (double *)calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),sizeof(double)); double *xtwy = (double *)calloc((y_rows+y_cols),sizeof(double)); double *out_beta = (double *)calloc((y_rows+y_cols),sizeof(double)); #endif double *out_beta_gpu = (double *)calloc((y_rows+y_cols),sizeof(double)); int_t i,j; /* initialize random seed: */ srand (time(NULL)); for(j=0; j<y_cols; j++){ for(i=0; i<y_rows; i++){ wts[j*y_rows+i] = rand()%(RANDOM_MAX - RANDOM_MIN + 1) + RANDOM_MIN; wts[j*y_rows+i]=1.0/wts[j*y_rows+i]; y[j*y_rows+i] = rand()%(RANDOM_MAX - RANDOM_MIN + 1) + RANDOM_MIN; y[j*y_rows+i]=1.0/y[j*y_rows+i]; } } #ifdef CPU_COMPUTE printf("\n Starting CPU Computation\n"); gettimeofday(&start, NULL); //XTWX start XTWX(y_rows,y_cols,wts,xtwx); printf("\nXTWX done\n"); fflush(stdout); //XTWXinv XTWXinv(y_rows, y_cols,xtwx); printf("\nXTWXinv done\n"); fflush(stdout); //XTWY XTWY(y_rows, y_cols, wts,y, xtwy); printf("\nXTWY done\n"); fflush(stdout); //OUT_BETA for (i=0;i < y_rows+y_cols-1; i++){ out_beta[i] = 0.0; for (j=0;j < y_rows+y_cols -1; j++){ out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j]; } } gettimeofday(&end, NULL); utime = ((end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec); printf("\n CPU Computation done \n"); printf("\nTime CPU = %ld us\n",utime); #endif printf("\n Starting GPU Computation\n"); //GPU start gettimeofday(&start, NULL); wls_gpu(y_cols, y_rows, wts, y, out_beta_gpu); gettimeofday(&end, NULL); utime = ((end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec); printf("\n GPU Computation done \n"); printf("\nTime GPU = %ld us\n",utime); #if 0 //check int_t M_size = y_cols+y_rows-1; //check for A double Ainv_err = 0.0; for(i=0; i<y_cols; i++){ Ainv_err+=fabs(((1.0/h_Ainv[i]) - xtwx[i*M_size+i])/xtwx[i*M_size+i]); } Ainv_err/=y_cols; printf("\nError Ainv = %e\n",Ainv_err); //check for B double B_err = 0.0; for(j=0; j<(y_rows-1); j++){ for(i=0; i<y_cols; i++){ B_err+=fabs((h_B[j*y_cols+i] - xtwx[(y_cols+j)*M_size+i])/xtwx[(y_cols+j)*M_size+i]); } } B_err/=(y_cols*(y_rows-1)); printf("\nError B = %e\n",B_err); //check for D double D_err = 0.0; for(j=0; j<(y_rows-1); j++){ for(i=0; i<(y_rows-1); i++){ D_err+=fabs((h_D[j*(y_rows-1)+i] - xtwx[(y_cols+j)*M_size+y_cols+i])/xtwx[(y_cols+j)*M_size+y_cols+i]); } } D_err/=((y_rows-1)*(y_rows-1)); printf("\nError D = %e\n",D_err); //check for Q double Q_err = 0.0; for(j=0; j<(y_rows-1); j++){ for(i=0; i<y_cols; i++){ Q_err+=fabs((h_Q[j*y_cols+i] - xtwx[(y_cols+j)*M_size+i])/xtwx[(y_cols+j)*M_size+i]); } } Q_err/=(y_cols*(y_rows-1)); printf("\nError Q = %e\n",Q_err); //check for S double S_err = 0.0; for(j=0; j<(y_rows-1); j++){ for(i=0; i<(y_rows-1); i++){ S_err+=fabs((h_S[j*(y_rows-1)+i] - xtwx[(y_cols+j)*M_size+y_cols+i])/xtwx[(y_cols+j)*M_size+y_cols+i]); } } S_err/=((y_rows-1)*(y_rows-1)); printf("\nError S = %e\n",S_err); #endif #ifdef CPU_COMPUTE //check for out_beta double out_beta_err = 0.0; for(j=0; j<(y_rows+y_cols-1); j++){ out_beta_err+=fabs((out_beta_gpu[j] - out_beta[j])/out_beta[j]); } out_beta_err/=(y_rows+y_cols-1); printf("\nError Out_beta_error = %e\n",out_beta_err); #endif free(wts); free(y); #ifdef CPU_COMPUTE free(xtwx); free(xtwy); free(out_beta); #endif free(out_beta_gpu); return 0; }
void XTWX_R(int *rows, int *cols, double *out_weights, double *xtwx){ XTWX(*rows, *cols, out_weights,xtwx); }
void rlm_wfit_anova_engine(double *y, int y_rows, int y_cols, double *input_scale, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ int i,j,iter; /* double tol = 1e-7; */ double acc = 1e-4; double scale =0.0; double conv; double endprobe; double *wts = out_weights; double *resids = out_resids; double *old_resids = Calloc(y_rows*y_cols,double); double *rowmeans = Calloc(y_rows,double); double *xtwx = Calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),double); double *xtwy = Calloc((y_rows+y_cols),double); double sumweights, rows; rows = y_rows*y_cols; if (!initialized){ /* intially use equal weights */ for (i=0; i < rows; i++){ wts[i] = w[i]*1.0; } } /* starting matrix */ for (i=0; i < y_rows; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = y[j*y_rows + i]; } } /* sweep columns (ie chip effects) */ for (j=0; j < y_cols; j++){ out_beta[j] = 0.0; sumweights = 0.0; for (i=0; i < y_rows; i++){ out_beta[j] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } out_beta[j]/=sumweights; for (i=0; i < y_rows; i++){ resids[j*y_rows + i] = resids[j*y_rows + i] - out_beta[j]; } } /* sweep rows (ie probe effects) */ for (i=0; i < y_rows; i++){ rowmeans[i] = 0.0; sumweights = 0.0; for (j=0; j < y_cols; j++){ rowmeans[i] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } rowmeans[i]/=sumweights; for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = resids[j*y_rows + i] - rowmeans[i]; } } for (i=0; i < y_rows-1; i++){ out_beta[i+y_cols] = rowmeans[i]; } for (iter = 0; iter < max_iter; iter++){ if (*input_scale < 0){ scale = med_abs(resids,rows)/0.6745; } else { scale = *input_scale; } if (fabs(scale) < 1e-10){ /*printf("Scale too small \n"); */ break; } for (i =0; i < rows; i++){ old_resids[i] = resids[i]; } for (i=0; i < rows; i++){ wts[i] = w[i]*PsiFn(resids[i]/scale,psi_k,0); /* psi_huber(resids[i]/scale,k,0); */ } /* printf("%f\n",scale); */ /* weighted least squares */ memset(xtwx,0,(y_rows+y_cols-1)*(y_rows+y_cols-1)*sizeof(double)); XTWX(y_rows,y_cols,wts,xtwx); XTWXinv(y_rows, y_cols,xtwx); XTWY(y_rows, y_cols, wts,y, xtwy); for (i=0;i < y_rows+y_cols-1; i++){ out_beta[i] = 0.0; for (j=0;j < y_rows+y_cols -1; j++){ out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j]; } } /* residuals */ for (i=0; i < y_rows-1; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows +i] = y[j*y_rows + i]- (out_beta[j] + out_beta[i + y_cols]); } } for (j=0; j < y_cols; j++){ endprobe=0.0; for (i=0; i < y_rows-1; i++){ endprobe+= out_beta[i + y_cols]; } resids[j*y_rows + y_rows-1] = y[j*y_rows + y_rows-1]- (out_beta[j] - endprobe); } /*check convergence based on residuals */ conv = irls_delta(old_resids,resids, rows); if (conv < acc){ /* printf("Converged \n");*/ break; } } if (*input_scale < 0){ scale = med_abs(resids,rows)/0.6745; } else { scale = *input_scale; } Free(xtwx); Free(xtwy); Free(old_resids); Free(rowmeans); input_scale[0] = scale; }
void rlm_compute_se_anova(double *Y, int y_rows,int y_cols, double *beta, double *resids,double *weights,double *se_estimates, double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k){ int i,j; /* counter/indexing variables */ double k1 = psi_k; /* was 1.345; */ double sumpsi2=0.0; /* sum of psi(r_i)^2 */ /* double sumpsi=0.0; */ double sumderivpsi=0.0; /* sum of psi'(r_i) */ double Kappa=0.0; /* A correction factor */ double scale=0.0; int n = y_rows*y_cols; int p = y_rows + y_cols -1; double *XTX = Calloc(p*p,double); double *W = Calloc(p*p,double); double *work = Calloc(p*p,double); double RMSEw = 0.0; double vs=0.0,m,varderivpsi=0.0; double *W_tmp=Calloc(n,double); if (method == 4){ for (i=0; i < n; i++){ RMSEw+= weights[i]*resids[i]*resids[i]; } RMSEw = sqrt(RMSEw/(double)(n-p)); residSE[0] = RMSEw; XTWX(y_rows,y_cols,weights,XTX); if (y_rows > 1){ XTWXinv(y_rows, y_cols,XTX); } else { for (i=0; i < p; i++){ XTX[i*p + i] = 1.0/XTX[i*p + i]; } } /* make sure in right order for (i =0; i < y_rows-1; i++){ se_estimates[i] = RMSEw*sqrt(XTX[(i+y_cols)*p + (i+y_cols)]); } for (i =0; i < y_cols; i++){ se_estimates[i+(y_rows -1)] = RMSEw*sqrt(XTX[i*p + i]); } */ for (i =0; i < p; i++){ se_estimates[i] = RMSEw*sqrt(XTX[i*p + i]); } if (varcov != NULL) for (i = 0; i < p; i++) for (j = i; j < p; j++) varcov[j*p + i] = RMSEw*RMSEw*XTX[j*p + i]; se_estimates[p] = 0.0; for (i=y_cols; i < p; i++) for (j = y_cols; j < p; j++) se_estimates[p]+= -1*RMSEw*RMSEw*XTX[j*p + i]; se_estimates[p] = sqrt(-1*se_estimates[p]); /* if (varcov != NULL){ copy across varcov matrix in right order for (i = 0; i < y_rows-1; i++) for (j = i; j < y_rows-1; j++) varcov[j*p + i] = RMSEw*RMSEw*XTX[(j+y_cols)*p + (i+y_cols)]; for (i = 0; i < y_cols; i++) for (j = i; j < y_cols; j++) varcov[(j+(y_rows-1))*p + (i+(y_rows -1))] = RMSEw*RMSEw*XTX[j*p + i]; for (i = 0; i < y_cols; i++) for (j = y_cols; j < p; j++) varcov[(i+ y_rows -1)*p + (j - y_cols)] = RMSEw*RMSEw*XTX[j*p + i]; } */ } else { scale = med_abs(resids,n)/0.6745; residSE[0] = scale; /* compute most of what we will need to do each of the different standard error methods */ for (i =0; i < n; i++){ sumpsi2+= PsiFn(resids[i]/scale,k1,2)*PsiFn(resids[i]/scale,k1,2); /* sumpsi += psi_huber(resids[i]/scale,k1,2); */ sumderivpsi+= PsiFn(resids[i]/scale,k1,1); } m = (sumderivpsi/(double) n); for (i = 0; i < n; i++){ varderivpsi+=(PsiFn(resids[i]/scale,k1,1) - m)*(PsiFn(resids[i]/scale,k1,1) - m); } varderivpsi/=(double)(n); /* Kappa = 1.0 + (double)p/(double)n * (1.0-m)/(m); */ Kappa = 1.0 + ((double)p/(double)n) *varderivpsi/(m*m); /* prepare XtX and W matrices */ for (i=0; i < n; i++){ W_tmp[i] = 1.0; } XTWX(y_rows,y_cols,W_tmp,XTX); for (i=0; i < n; i++){ W_tmp[i] = PsiFn(resids[i]/scale,k1,1); } XTWX(y_rows,y_cols,W_tmp,W); if (method==1) { Kappa = Kappa*Kappa; vs = scale*scale*sumpsi2/(double)(n-p); Kappa = Kappa*vs/(m*m); RLM_SE_Method_1_anova(Kappa, XTX, y_rows,y_cols, se_estimates,varcov); } else if (method==2){ vs = scale*scale*sumpsi2/(double)(n-p); Kappa = Kappa*vs/m; RLM_SE_Method_2_anova(Kappa, W, y_rows,y_cols, se_estimates,varcov); } else if (method==3){ vs = scale*scale*sumpsi2/(double)(n-p); Kappa = 1.0/Kappa*vs; i = RLM_SE_Method_3_anova(Kappa, XTX, W, y_rows,y_cols, se_estimates,varcov); if (i){ for (i=0; i <n; i++){ // printf("%2.1f ", PsiFn(resids[i]/scale,k1,1)); } //printf("\n"); } } } Free(W_tmp); Free(work); Free(XTX); Free(W); }