예제 #1
0
파일: wls.c 프로젝트: sufengniu/MRPLM
void wls_cpu(int_t y_cols, int_t y_rows, double* wts, double* y, double* out_beta_cpu){

	double *xtwx = (double *)calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),sizeof(double));
	double *xtwy = (double *)calloc((y_rows+y_cols),sizeof(double));

	//XTWX start 
	XTWX(y_rows, y_cols, wts, xtwx); 
//	printf("\nXTWX done\n");	fflush(stdout);                                                                                  

	//XTWXinv 
	XTWXinv(y_rows, y_cols, xtwx); 
//	printf("\nXTWXinv done\n");	fflush(stdout);                                                                           

	//XTWY     
	XTWY(y_rows, y_cols, wts, y, xtwy);  
//	printf("\nXTWY done\n");	fflush(stdout);	

	int i, j;
	//OUT_BETA                                                                                                                
	for (i=0; i < y_rows+y_cols-1; i++){  
		out_beta_cpu[i] = 0.0;       
		for (j=0; j < y_rows+y_cols-1; j++){
			out_beta_cpu[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j];	
		}        
	}

	free(xtwx);
	free(xtwy);

	return 0;
}
예제 #2
0
파일: test.c 프로젝트: sufengniu/MRPLM
int main(){

	
	struct timeval start, end;
	long utime;	

	
	int_t y_rows = Y_ROWS;
	int_t y_cols = Y_COLS;

	double *wts = (double *)calloc((y_rows*y_cols),sizeof(double));
	double *y = (double *)calloc((y_rows*y_cols),sizeof(double));

	printf("testing here\n");

#ifdef CPU_COMPUTE
	double *xtwx = (double *)calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),sizeof(double));
	double *xtwy = (double *)calloc((y_rows+y_cols),sizeof(double));	
	double *out_beta = (double *)calloc((y_rows+y_cols),sizeof(double));
#endif	
	double *out_beta_gpu = (double *)calloc((y_rows+y_cols),sizeof(double));

	int_t i,j;
	
	/* initialize random seed: */
  	srand (time(NULL));
	
	for(j=0; j<y_cols; j++){
		for(i=0; i<y_rows; i++){
			wts[j*y_rows+i] = rand()%(RANDOM_MAX - RANDOM_MIN + 1) + RANDOM_MIN;
			wts[j*y_rows+i]=1.0/wts[j*y_rows+i];
			y[j*y_rows+i] = rand()%(RANDOM_MAX - RANDOM_MIN + 1) + RANDOM_MIN;
			y[j*y_rows+i]=1.0/y[j*y_rows+i];
		}
	}

#ifdef CPU_COMPUTE
	printf("\n Starting CPU Computation\n");
	gettimeofday(&start, NULL);
	
	//XTWX start
	XTWX(y_rows,y_cols,wts,xtwx);
	printf("\nXTWX done\n"); fflush(stdout);

	//XTWXinv	
	XTWXinv(y_rows, y_cols,xtwx);
	printf("\nXTWXinv done\n");	fflush(stdout);

	//XTWY	
	XTWY(y_rows, y_cols, wts,y, xtwy);
	printf("\nXTWY done\n");	fflush(stdout);

	//OUT_BETA	
	for (i=0;i < y_rows+y_cols-1; i++){
		out_beta[i] = 0.0;
		for (j=0;j < y_rows+y_cols -1; j++){
			out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j];
		}
	}

	gettimeofday(&end, NULL);
	utime = ((end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec);
	printf("\n CPU Computation done \n");
	printf("\nTime CPU = %ld us\n",utime);
#endif

	printf("\n Starting GPU Computation\n");

	//GPU start
	gettimeofday(&start, NULL);

	wls_gpu(y_cols, y_rows, wts, y, out_beta_gpu);

	gettimeofday(&end, NULL);
	utime = ((end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec);
	printf("\n GPU Computation done \n");	
	printf("\nTime GPU = %ld us\n",utime);

#if 0
	//check
	int_t M_size = y_cols+y_rows-1;
	
	//check for A
	double Ainv_err = 0.0;
	for(i=0; i<y_cols; i++){
		Ainv_err+=fabs(((1.0/h_Ainv[i]) - xtwx[i*M_size+i])/xtwx[i*M_size+i]);
	}
	Ainv_err/=y_cols;
	printf("\nError Ainv = %e\n",Ainv_err);
	
	//check for B
	double B_err = 0.0;
	for(j=0; j<(y_rows-1); j++){
		for(i=0; i<y_cols; i++){
			B_err+=fabs((h_B[j*y_cols+i] - xtwx[(y_cols+j)*M_size+i])/xtwx[(y_cols+j)*M_size+i]);
		}
	}
	B_err/=(y_cols*(y_rows-1));
	printf("\nError B = %e\n",B_err);

	//check for D
	double D_err = 0.0;
	for(j=0; j<(y_rows-1); j++){
		for(i=0; i<(y_rows-1); i++){
			D_err+=fabs((h_D[j*(y_rows-1)+i] - xtwx[(y_cols+j)*M_size+y_cols+i])/xtwx[(y_cols+j)*M_size+y_cols+i]);
		}
	}
	D_err/=((y_rows-1)*(y_rows-1));
	printf("\nError D = %e\n",D_err);

	//check for Q
	double Q_err = 0.0;
	for(j=0; j<(y_rows-1); j++){
		for(i=0; i<y_cols; i++){
			Q_err+=fabs((h_Q[j*y_cols+i] - xtwx[(y_cols+j)*M_size+i])/xtwx[(y_cols+j)*M_size+i]);
		}
	}
	Q_err/=(y_cols*(y_rows-1));
	printf("\nError Q = %e\n",Q_err);


	//check for S
	double S_err = 0.0;
	for(j=0; j<(y_rows-1); j++){
		for(i=0; i<(y_rows-1); i++){
			S_err+=fabs((h_S[j*(y_rows-1)+i] - xtwx[(y_cols+j)*M_size+y_cols+i])/xtwx[(y_cols+j)*M_size+y_cols+i]);
		}
	}
	S_err/=((y_rows-1)*(y_rows-1));
	printf("\nError S = %e\n",S_err);
#endif

#ifdef CPU_COMPUTE	
	//check for out_beta
	double out_beta_err = 0.0;
	for(j=0; j<(y_rows+y_cols-1); j++){
		out_beta_err+=fabs((out_beta_gpu[j] - out_beta[j])/out_beta[j]);
	}
	out_beta_err/=(y_rows+y_cols-1);
	printf("\nError Out_beta_error = %e\n",out_beta_err);
#endif

	free(wts);
	free(y);
#ifdef CPU_COMPUTE	
	free(xtwx);	
	free(xtwy);
	free(out_beta);
#endif	
	free(out_beta_gpu);
	
	return 0;
	
}
예제 #3
0
void XTWX_R(int *rows, int *cols, double *out_weights, double *xtwx){

  XTWX(*rows, *cols, out_weights,xtwx);
}
예제 #4
0
void rlm_wfit_anova_engine(double *y, int y_rows, int y_cols, double *input_scale, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){

  int i,j,iter;
  /* double tol = 1e-7; */
  double acc = 1e-4;
  double scale =0.0;
  double conv;
  double endprobe;

  double *wts = out_weights; 

  double *resids = out_resids; 
  double *old_resids = Calloc(y_rows*y_cols,double);
  
  double *rowmeans = Calloc(y_rows,double);

  double *xtwx = Calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),double);
  double *xtwy = Calloc((y_rows+y_cols),double);

  double sumweights, rows;
  
  rows = y_rows*y_cols;
  
  if (!initialized){
    
    /* intially use equal weights */
    for (i=0; i < rows; i++){
      wts[i] = w[i]*1.0;
    }
  }

  /* starting matrix */
  
  for (i=0; i < y_rows; i++){
    for (j=0; j < y_cols; j++){
      resids[j*y_rows + i] = y[j*y_rows + i];
    }
  }
  
  /* sweep columns (ie chip effects) */

  for (j=0; j < y_cols; j++){
    out_beta[j] = 0.0;
    sumweights = 0.0;
    for (i=0; i < y_rows; i++){
      out_beta[j] += wts[j*y_rows + i]* resids[j*y_rows + i];
      sumweights +=  wts[j*y_rows + i];
    }
    out_beta[j]/=sumweights;
    for (i=0; i < y_rows; i++){
      resids[j*y_rows + i] = resids[j*y_rows + i] -  out_beta[j];
    }
  }


 /* sweep rows  (ie probe effects) */
  
  for (i=0; i < y_rows; i++){
    rowmeans[i] = 0.0;
    sumweights = 0.0;
    for (j=0; j < y_cols; j++){
      rowmeans[i] += wts[j*y_rows + i]* resids[j*y_rows + i]; 
      sumweights +=  wts[j*y_rows + i];
    }
    rowmeans[i]/=sumweights;
    for (j=0; j < y_cols; j++){
       resids[j*y_rows + i] =  resids[j*y_rows + i] - rowmeans[i];
    }
  }
  for (i=0; i < y_rows-1; i++){
    out_beta[i+y_cols] = rowmeans[i];
  }



  for (iter = 0; iter < max_iter; iter++){
    if (*input_scale < 0){
      scale = med_abs(resids,rows)/0.6745;
    } else {
      scale = *input_scale;
    }
    
    if (fabs(scale) < 1e-10){
      /*printf("Scale too small \n"); */
      break;
    }
    for (i =0; i < rows; i++){
      old_resids[i] = resids[i];
    }

    for (i=0; i < rows; i++){
      wts[i] = w[i]*PsiFn(resids[i]/scale,psi_k,0);  /*           psi_huber(resids[i]/scale,k,0); */
    }
   
    /* printf("%f\n",scale); */


    /* weighted least squares */
    
    memset(xtwx,0,(y_rows+y_cols-1)*(y_rows+y_cols-1)*sizeof(double));


    XTWX(y_rows,y_cols,wts,xtwx);
    XTWXinv(y_rows, y_cols,xtwx);
    XTWY(y_rows, y_cols, wts,y, xtwy);

    
    for (i=0;i < y_rows+y_cols-1; i++){
      out_beta[i] = 0.0;
       for (j=0;j < y_rows+y_cols -1; j++){
    	 out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j];
       }
    }

    /* residuals */
    
    for (i=0; i < y_rows-1; i++){
      for (j=0; j < y_cols; j++){
	resids[j*y_rows +i] = y[j*y_rows + i]- (out_beta[j] + out_beta[i + y_cols]); 
      }
    }

    for (j=0; j < y_cols; j++){
      endprobe=0.0;
      for (i=0; i < y_rows-1; i++){
	endprobe+= out_beta[i + y_cols];
      }
      resids[j*y_rows + y_rows-1] = y[j*y_rows + y_rows-1]- (out_beta[j] - endprobe);
    }

    /*check convergence  based on residuals */
    
    conv = irls_delta(old_resids,resids, rows);
    
    if (conv < acc){
      /*    printf("Converged \n");*/
      break; 

    }



  }
        
  if (*input_scale < 0){
    scale = med_abs(resids,rows)/0.6745;
  } else {
    scale = *input_scale;
  }

  Free(xtwx);
  Free(xtwy);
  Free(old_resids);
  Free(rowmeans);
  input_scale[0] = scale;

}
예제 #5
0
void rlm_compute_se_anova(double *Y, int y_rows,int y_cols, double *beta, double *resids,double *weights,double *se_estimates, double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k){
  
  int i,j; /* counter/indexing variables */
  double k1 = psi_k;   /*  was 1.345; */
  double sumpsi2=0.0;  /* sum of psi(r_i)^2 */
  /*  double sumpsi=0.0; */
  double sumderivpsi=0.0; /* sum of psi'(r_i) */
  double Kappa=0.0;      /* A correction factor */
  double scale=0.0;
  int n = y_rows*y_cols;
  int p = y_rows + y_cols -1;
  double *XTX = Calloc(p*p,double);
  double *W = Calloc(p*p,double);
  double *work = Calloc(p*p,double);
  double RMSEw = 0.0;
  double vs=0.0,m,varderivpsi=0.0; 
  double *W_tmp=Calloc(n,double);


  if (method == 4){
    for (i=0; i < n; i++){
      RMSEw+= weights[i]*resids[i]*resids[i];
    }
    
    RMSEw = sqrt(RMSEw/(double)(n-p));

    residSE[0] =  RMSEw;


    XTWX(y_rows,y_cols,weights,XTX);
    if (y_rows > 1){
      XTWXinv(y_rows, y_cols,XTX);
    } else {
      for (i=0; i < p; i++){
	XTX[i*p + i] = 1.0/XTX[i*p + i];
      }
    }
    /* make sure in right order 
       
    for (i =0; i < y_rows-1; i++){
    se_estimates[i] = RMSEw*sqrt(XTX[(i+y_cols)*p + (i+y_cols)]);
    }
    for (i =0; i < y_cols; i++){
    se_estimates[i+(y_rows -1)] = RMSEw*sqrt(XTX[i*p + i]);
    } */
    
    for (i =0; i < p; i++){
      se_estimates[i] = RMSEw*sqrt(XTX[i*p + i]);
    }
    
    
    if (varcov != NULL)
      for (i = 0; i < p; i++)
	for (j = i; j < p; j++)
	  varcov[j*p + i] =  RMSEw*RMSEw*XTX[j*p + i];
    
    se_estimates[p] = 0.0;

    for (i=y_cols; i < p; i++)
      for (j = y_cols; j < p; j++)
    se_estimates[p]+= -1*RMSEw*RMSEw*XTX[j*p + i];
    
    se_estimates[p] = sqrt(-1*se_estimates[p]);

    /*     if (varcov != NULL){
	   copy across varcov matrix in right order 
	   for (i = 0; i < y_rows-1; i++)
	   for (j = i; j < y_rows-1; j++)
	   varcov[j*p + i] =  RMSEw*RMSEw*XTX[(j+y_cols)*p + (i+y_cols)];
	   
	   for (i = 0; i < y_cols; i++)
	   for (j = i; j < y_cols; j++)
	   varcov[(j+(y_rows-1))*p + (i+(y_rows -1))] =  RMSEw*RMSEw*XTX[j*p + i];
	   
	   
      
	   for (i = 0; i < y_cols; i++)
	   for (j = y_cols; j < p; j++)
	   varcov[(i+ y_rows -1)*p + (j - y_cols)] =  RMSEw*RMSEw*XTX[j*p + i];
	   } */


  } else {
    scale = med_abs(resids,n)/0.6745;
    
    residSE[0] =  scale;
    
    /* compute most of what we will need to do each of the different standard error methods */
    for (i =0; i < n; i++){
      sumpsi2+= PsiFn(resids[i]/scale,k1,2)*PsiFn(resids[i]/scale,k1,2); 
      /* sumpsi += psi_huber(resids[i]/scale,k1,2); */
      sumderivpsi+= PsiFn(resids[i]/scale,k1,1);
    }
    
    m = (sumderivpsi/(double) n);

    for (i = 0; i < n; i++){
      varderivpsi+=(PsiFn(resids[i]/scale,k1,1) - m)*(PsiFn(resids[i]/scale,k1,1) - m);
    }
    varderivpsi/=(double)(n);

    /*    Kappa = 1.0 + (double)p/(double)n * (1.0-m)/(m); */


    Kappa = 1.0 + ((double)p/(double)n) *varderivpsi/(m*m);

    
    /* prepare XtX and W matrices */

    for (i=0; i < n; i++){
      W_tmp[i] = 1.0;
    }
    XTWX(y_rows,y_cols,W_tmp,XTX);
    
     for (i=0; i < n; i++){
       W_tmp[i] = PsiFn(resids[i]/scale,k1,1);
    }
    XTWX(y_rows,y_cols,W_tmp,W);

    if (method==1) {
      Kappa = Kappa*Kappa;
      vs = scale*scale*sumpsi2/(double)(n-p);
      Kappa = Kappa*vs/(m*m);
      RLM_SE_Method_1_anova(Kappa, XTX, y_rows,y_cols, se_estimates,varcov);
    } else if (method==2){
      vs = scale*scale*sumpsi2/(double)(n-p);
      Kappa = Kappa*vs/m;
      RLM_SE_Method_2_anova(Kappa, W, y_rows,y_cols, se_estimates,varcov);
      
    } else if (method==3){
      
      vs = scale*scale*sumpsi2/(double)(n-p);
      Kappa = 1.0/Kappa*vs;
      i = RLM_SE_Method_3_anova(Kappa, XTX, W, y_rows,y_cols, se_estimates,varcov);
      if (i){
	for (i=0; i <n; i++){
	  //	  printf("%2.1f ", PsiFn(resids[i]/scale,k1,1));
	} 
	//printf("\n");
      }
    } 
  }
  Free(W_tmp);
  Free(work);
  Free(XTX);
  Free(W);

}