Exemple #1
0
void wls_cpu(int_t y_cols, int_t y_rows, double* wts, double* y, double* out_beta_cpu){

	double *xtwx = (double *)calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),sizeof(double));
	double *xtwy = (double *)calloc((y_rows+y_cols),sizeof(double));

	//XTWX start 
	XTWX(y_rows, y_cols, wts, xtwx); 
//	printf("\nXTWX done\n");	fflush(stdout);                                                                                  

	//XTWXinv 
	XTWXinv(y_rows, y_cols, xtwx); 
//	printf("\nXTWXinv done\n");	fflush(stdout);                                                                           

	//XTWY     
	XTWY(y_rows, y_cols, wts, y, xtwy);  
//	printf("\nXTWY done\n");	fflush(stdout);	

	int i, j;
	//OUT_BETA                                                                                                                
	for (i=0; i < y_rows+y_cols-1; i++){  
		out_beta_cpu[i] = 0.0;       
		for (j=0; j < y_rows+y_cols-1; j++){
			out_beta_cpu[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j];	
		}        
	}

	free(xtwx);
	free(xtwy);

	return 0;
}
Exemple #2
0
int main(){

	
	struct timeval start, end;
	long utime;	

	
	int_t y_rows = Y_ROWS;
	int_t y_cols = Y_COLS;

	double *wts = (double *)calloc((y_rows*y_cols),sizeof(double));
	double *y = (double *)calloc((y_rows*y_cols),sizeof(double));

	printf("testing here\n");

#ifdef CPU_COMPUTE
	double *xtwx = (double *)calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),sizeof(double));
	double *xtwy = (double *)calloc((y_rows+y_cols),sizeof(double));	
	double *out_beta = (double *)calloc((y_rows+y_cols),sizeof(double));
#endif	
	double *out_beta_gpu = (double *)calloc((y_rows+y_cols),sizeof(double));

	int_t i,j;
	
	/* initialize random seed: */
  	srand (time(NULL));
	
	for(j=0; j<y_cols; j++){
		for(i=0; i<y_rows; i++){
			wts[j*y_rows+i] = rand()%(RANDOM_MAX - RANDOM_MIN + 1) + RANDOM_MIN;
			wts[j*y_rows+i]=1.0/wts[j*y_rows+i];
			y[j*y_rows+i] = rand()%(RANDOM_MAX - RANDOM_MIN + 1) + RANDOM_MIN;
			y[j*y_rows+i]=1.0/y[j*y_rows+i];
		}
	}

#ifdef CPU_COMPUTE
	printf("\n Starting CPU Computation\n");
	gettimeofday(&start, NULL);
	
	//XTWX start
	XTWX(y_rows,y_cols,wts,xtwx);
	printf("\nXTWX done\n"); fflush(stdout);

	//XTWXinv	
	XTWXinv(y_rows, y_cols,xtwx);
	printf("\nXTWXinv done\n");	fflush(stdout);

	//XTWY	
	XTWY(y_rows, y_cols, wts,y, xtwy);
	printf("\nXTWY done\n");	fflush(stdout);

	//OUT_BETA	
	for (i=0;i < y_rows+y_cols-1; i++){
		out_beta[i] = 0.0;
		for (j=0;j < y_rows+y_cols -1; j++){
			out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j];
		}
	}

	gettimeofday(&end, NULL);
	utime = ((end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec);
	printf("\n CPU Computation done \n");
	printf("\nTime CPU = %ld us\n",utime);
#endif

	printf("\n Starting GPU Computation\n");

	//GPU start
	gettimeofday(&start, NULL);

	wls_gpu(y_cols, y_rows, wts, y, out_beta_gpu);

	gettimeofday(&end, NULL);
	utime = ((end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec);
	printf("\n GPU Computation done \n");	
	printf("\nTime GPU = %ld us\n",utime);

#if 0
	//check
	int_t M_size = y_cols+y_rows-1;
	
	//check for A
	double Ainv_err = 0.0;
	for(i=0; i<y_cols; i++){
		Ainv_err+=fabs(((1.0/h_Ainv[i]) - xtwx[i*M_size+i])/xtwx[i*M_size+i]);
	}
	Ainv_err/=y_cols;
	printf("\nError Ainv = %e\n",Ainv_err);
	
	//check for B
	double B_err = 0.0;
	for(j=0; j<(y_rows-1); j++){
		for(i=0; i<y_cols; i++){
			B_err+=fabs((h_B[j*y_cols+i] - xtwx[(y_cols+j)*M_size+i])/xtwx[(y_cols+j)*M_size+i]);
		}
	}
	B_err/=(y_cols*(y_rows-1));
	printf("\nError B = %e\n",B_err);

	//check for D
	double D_err = 0.0;
	for(j=0; j<(y_rows-1); j++){
		for(i=0; i<(y_rows-1); i++){
			D_err+=fabs((h_D[j*(y_rows-1)+i] - xtwx[(y_cols+j)*M_size+y_cols+i])/xtwx[(y_cols+j)*M_size+y_cols+i]);
		}
	}
	D_err/=((y_rows-1)*(y_rows-1));
	printf("\nError D = %e\n",D_err);

	//check for Q
	double Q_err = 0.0;
	for(j=0; j<(y_rows-1); j++){
		for(i=0; i<y_cols; i++){
			Q_err+=fabs((h_Q[j*y_cols+i] - xtwx[(y_cols+j)*M_size+i])/xtwx[(y_cols+j)*M_size+i]);
		}
	}
	Q_err/=(y_cols*(y_rows-1));
	printf("\nError Q = %e\n",Q_err);


	//check for S
	double S_err = 0.0;
	for(j=0; j<(y_rows-1); j++){
		for(i=0; i<(y_rows-1); i++){
			S_err+=fabs((h_S[j*(y_rows-1)+i] - xtwx[(y_cols+j)*M_size+y_cols+i])/xtwx[(y_cols+j)*M_size+y_cols+i]);
		}
	}
	S_err/=((y_rows-1)*(y_rows-1));
	printf("\nError S = %e\n",S_err);
#endif

#ifdef CPU_COMPUTE	
	//check for out_beta
	double out_beta_err = 0.0;
	for(j=0; j<(y_rows+y_cols-1); j++){
		out_beta_err+=fabs((out_beta_gpu[j] - out_beta[j])/out_beta[j]);
	}
	out_beta_err/=(y_rows+y_cols-1);
	printf("\nError Out_beta_error = %e\n",out_beta_err);
#endif

	free(wts);
	free(y);
#ifdef CPU_COMPUTE	
	free(xtwx);	
	free(xtwy);
	free(out_beta);
#endif	
	free(out_beta_gpu);
	
	return 0;
	
}
Exemple #3
0
void XTWY_R(int *rows, int *cols, double *out_weights, double *y,double *xtwy){
  XTWY(*rows, *cols, out_weights,y,xtwy);
}
Exemple #4
0
void rlm_wfit_anova_engine(double *y, int y_rows, int y_cols, double *input_scale, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){

  int i,j,iter;
  /* double tol = 1e-7; */
  double acc = 1e-4;
  double scale =0.0;
  double conv;
  double endprobe;

  double *wts = out_weights; 

  double *resids = out_resids; 
  double *old_resids = Calloc(y_rows*y_cols,double);
  
  double *rowmeans = Calloc(y_rows,double);

  double *xtwx = Calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),double);
  double *xtwy = Calloc((y_rows+y_cols),double);

  double sumweights, rows;
  
  rows = y_rows*y_cols;
  
  if (!initialized){
    
    /* intially use equal weights */
    for (i=0; i < rows; i++){
      wts[i] = w[i]*1.0;
    }
  }

  /* starting matrix */
  
  for (i=0; i < y_rows; i++){
    for (j=0; j < y_cols; j++){
      resids[j*y_rows + i] = y[j*y_rows + i];
    }
  }
  
  /* sweep columns (ie chip effects) */

  for (j=0; j < y_cols; j++){
    out_beta[j] = 0.0;
    sumweights = 0.0;
    for (i=0; i < y_rows; i++){
      out_beta[j] += wts[j*y_rows + i]* resids[j*y_rows + i];
      sumweights +=  wts[j*y_rows + i];
    }
    out_beta[j]/=sumweights;
    for (i=0; i < y_rows; i++){
      resids[j*y_rows + i] = resids[j*y_rows + i] -  out_beta[j];
    }
  }


 /* sweep rows  (ie probe effects) */
  
  for (i=0; i < y_rows; i++){
    rowmeans[i] = 0.0;
    sumweights = 0.0;
    for (j=0; j < y_cols; j++){
      rowmeans[i] += wts[j*y_rows + i]* resids[j*y_rows + i]; 
      sumweights +=  wts[j*y_rows + i];
    }
    rowmeans[i]/=sumweights;
    for (j=0; j < y_cols; j++){
       resids[j*y_rows + i] =  resids[j*y_rows + i] - rowmeans[i];
    }
  }
  for (i=0; i < y_rows-1; i++){
    out_beta[i+y_cols] = rowmeans[i];
  }



  for (iter = 0; iter < max_iter; iter++){
    if (*input_scale < 0){
      scale = med_abs(resids,rows)/0.6745;
    } else {
      scale = *input_scale;
    }
    
    if (fabs(scale) < 1e-10){
      /*printf("Scale too small \n"); */
      break;
    }
    for (i =0; i < rows; i++){
      old_resids[i] = resids[i];
    }

    for (i=0; i < rows; i++){
      wts[i] = w[i]*PsiFn(resids[i]/scale,psi_k,0);  /*           psi_huber(resids[i]/scale,k,0); */
    }
   
    /* printf("%f\n",scale); */


    /* weighted least squares */
    
    memset(xtwx,0,(y_rows+y_cols-1)*(y_rows+y_cols-1)*sizeof(double));


    XTWX(y_rows,y_cols,wts,xtwx);
    XTWXinv(y_rows, y_cols,xtwx);
    XTWY(y_rows, y_cols, wts,y, xtwy);

    
    for (i=0;i < y_rows+y_cols-1; i++){
      out_beta[i] = 0.0;
       for (j=0;j < y_rows+y_cols -1; j++){
    	 out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j];
       }
    }

    /* residuals */
    
    for (i=0; i < y_rows-1; i++){
      for (j=0; j < y_cols; j++){
	resids[j*y_rows +i] = y[j*y_rows + i]- (out_beta[j] + out_beta[i + y_cols]); 
      }
    }

    for (j=0; j < y_cols; j++){
      endprobe=0.0;
      for (i=0; i < y_rows-1; i++){
	endprobe+= out_beta[i + y_cols];
      }
      resids[j*y_rows + y_rows-1] = y[j*y_rows + y_rows-1]- (out_beta[j] - endprobe);
    }

    /*check convergence  based on residuals */
    
    conv = irls_delta(old_resids,resids, rows);
    
    if (conv < acc){
      /*    printf("Converged \n");*/
      break; 

    }



  }
        
  if (*input_scale < 0){
    scale = med_abs(resids,rows)/0.6745;
  } else {
    scale = *input_scale;
  }

  Free(xtwx);
  Free(xtwy);
  Free(old_resids);
  Free(rowmeans);
  input_scale[0] = scale;

}