void r_sum_w_x_xprime(double **x, double *w, int n, int p, double **tmp, double **ans) { /* // given a matrix x (n x p) and a vector w of n // weights, it computes the matrix // \sumin w_i x_i x_i' // need space for p x p "doubles" in tmp */ void sum_mat(double **a, double **b, double **c, int n, int m); void matias_vec_vec(double **a, double *v1, double *v2, int n); void scalar_mat(double **a, double b, double **c, int n, int m); void reset_mat(double **a, int n, int m); register int i; reset_mat(ans, p, p); for(i=0; i<n; i++) { matias_vec_vec(tmp, x[i], x[i], p); scalar_mat(tmp, w[i], tmp, p, p); sum_mat(ans, tmp, ans, p, p); }; }
int main(int argc, char * argv[]) { parse_args(argc, argv); const int size = n * n; int data_size_bytes = size * sizeof(float); float *mat_a = malloc(data_size_bytes); float *mat_b = malloc(data_size_bytes); float *vector; float *output = malloc(data_size_bytes); float *expected = malloc(data_size_bytes); generate_matrix(n, mat_a, range); generate_matrix(n, mat_b, range); timing_t timer; timer_start(&timer); float *mat_b_trans = malloc(data_size_bytes); transpose(n, mat_b, mat_b_trans); for (int i=0; i<n; ++i) { vector = &mat_b_trans[n*i]; MatMatMultiply(n, mat_a, vector, &output[n*i]); } float *output_trans = malloc(data_size_bytes); transpose(n, output, output_trans); timer_stop(&timer); float sum = sum_mat(size, output_trans); printf("%d %f %ld %ld\n", n, sum, timer.realtime, timer.cputime); int status = 0; if (trace == 1) { printf("\nMatrix A\n"); for (int i=0; i<n; i++){ for (int j=0; j<n; j++){ printf("%f " , mat_a[i*n+j]); } printf("\n"); } printf("\nMatrix B \n"); for (int i=0; i<n; i++){ for (int j=0; j<n; j++){ printf("%f " , mat_b[i*n+j]); } printf("\n"); } printf("\n\nResult\n"); for (int i=0; i<n; i++){ for (int j=0; j<n; j++){ printf("%f " , output[i*n+j]); } printf("\n"); } } else if (trace == 2) { multiply_CPU_matrix(n, mat_a, mat_b, expected); int status = check(size, output_trans, expected); if (status) { printf("Test failed.\n"); status = 1; } else printf("Test passed OK!\n"); } free(mat_a); free(mat_b); free(mat_b_trans); free(output); free(expected); free(output_trans); return status; }
int main(int argc, char *argv[]) { FILE *mat; FILE *vec; FILE *end; double main_buf[MAXSIZE]; double main_vec_buf[MAXSIZE]; Cnum mat_buf[MAXSIZE]; Cnum vec_buf[MAXSIZE]; Cnum result[MAXSIZE]; double Row=0, Col=0; double vec_row; double mat_RC_data[6]; long mat_in=0; int j=0; int thread=atoi(argv[3]); double final_result[2]; mat = fopen("matrix.dat","rb"); vec = fopen("vector_input.dat","rb"); end = fopen("vector_output.dat","wb"); // exe input number 검사 if (argc == 4 && atoi(argv[1]) && atoi(argv[2])) printf("input = %d x %d, Thread = %d \n",atoi(argv[1]), atoi(argv[2]),atoi(argv[3])); else { fputs("[Error] Not a correct input", stderr); exit(1); } // 파일 유무 검사 if(mat==NULL) { fputs("Matrix File error", stderr); exit(1); } if(vec==NULL) { fputs("Vector File error", stderr); exit(1); } start_time_measurement(); //시간측정 시작 // 매트릭스 크기 int point; for(int i=3;i>0;i--) { point = i*4*sizeof(double); point = -point; if(fseek(mat, point, SEEK_END)==-1) { printf("error\n"); } fread(&mat_RC_data[j],sizeof(double),2,mat); Row=mat_RC_data[j]+1; j++; Col=mat_RC_data[j]+1; j++; //printf("row : %f, col : %f\n",Row,Col); } // matrix.dat 행 열값 부분 오류 검사 if(mat_RC_data[4]-mat_RC_data[2]>1) { fputs("[ERROR] Matrix File ROW_DATA error", stderr); exit(1); } if(mat_RC_data[2]-mat_RC_data[0]>1) { fputs("[ERROR] Matrix File ROW_DATA error", stderr); exit(1); } if(mat_RC_data[5]-mat_RC_data[3]>2) { fputs("[ERROR] Matrix File ROW_DATA error", stderr); exit(1); } if(mat_RC_data[3]-mat_RC_data[1]>2) { fputs("[ERROR] Matrix File ROW_DATA error", stderr); exit(1); } //커맨드 입력 행열 과 입력받은 매트릭스 비교 if(atoi(argv[1])!=Row) { fputs("[ERROR] 입력한 행값과 매트릭스의 행값이 다릅니다.", stderr); exit(1); } if(atoi(argv[2])!=Col) { fputs("[ERROR] 입력한 열값과 매트릭스의 열값이 다릅니다.", stderr); exit(1); } printf("전체 열의 값 = %f\n전체 행의 값 = %f \n", Row, Col); //vec 행렬 행 크기 fseek(vec,0,SEEK_END); vec_row = ftell(vec)/(sizeof(double)*2); printf("벡터행렬 행 값 = %f\n",vec_row); //행렬 크기가 다를 시 오류 메세지 출력 if(Col!=vec_row) { fputs("[ERROR] Matrix and Vector are not same", stderr); exit(1); } //행렬값 꺼내서 연산 j=0; int button=0; fseek(mat,0,SEEK_SET); //파일 포인터 위치 초기화 fseek(vec,0,SEEK_SET); //파일 포인터 위치 초기화 fseek(end,0,SEEK_SET); //파일 포인터 위치 초기화 for(int k=0; k<Row; k++) { //인덱스 초기화 button =0; j=0; fseek(vec,0,SEEK_SET); //파일 포인터 위치 초기화 //mat 행렬 가져와서 mat_buf[]에 저장 100개씩 -> ////vec 행의 갯수에 따라로 수정해야함 //// 일단은 100개단위로 for(int i=0; i<Col*4; i++) { fread(&main_buf[i], sizeof(double), 1, mat); //printf("값 : %f ", main_buf[i]); if(i>3 && i%4==0) j++; // 구조체에 값저장 if(button==2) mat_buf[j].realnum=main_buf[i]; else if(button==3) { mat_buf[j].imanum=main_buf[i]; } button++; if(button==4) button=0; } //vec 행렬 가져와서 vec_buf[]에 저장 j=0; for(int y=0; y<Col*2; y++) { fread(&main_vec_buf[y],sizeof(double),1,vec); if(y>1&&y%2==0) j++; if(y%2==0) vec_buf[j].realnum = main_vec_buf[y]; else { vec_buf[j].imanum = main_vec_buf[y]; } } // mat * vec /* for(int i=0; i<Col; i++) result[i]=mult(mat_buf[i],vec_buf[i]); */ mult_mat(mat_buf,vec_buf,result,(int)Col,thread); result[0]=sum_mat(result,(int)Col); /* for(int i=1; i<Col; i++) { result[0]=sum(result[0],result[i]); } */ fwrite(&result[0].realnum,sizeof(double),1,end); fwrite(&result[0].imanum,sizeof(double),1,end); //printf("%f, %f \n",final_result[0].realnum,final_result[0].imanum); } printf("Calculation Complete "); end_time_measurement(); //시간 측정 끝 fclose(mat); fclose(vec); fclose(end); return 0; }
double R_rlm_rand(double *X, double *y, int *N, int *P, int *Boot_Samp, int *Nres, int *M, int *size_boot, double *ours, double *full, double *Beta_m, double *Beta_s, double *Scale, int *Seed, int *calc_full, double *C, double *Psi_c, int *max_it, int *converged_mm, int *groups, int *n_group, int *k_fast_s) { void initialize_mat(double **a, int n, int m); void initialize_vec(double *a, int n); void R_S_rlm(double *X, double *y, int *n, int *P, int *nres, int *max_it, double *SCale, double *beta_s, double *beta_m, int *converged_mm, int *seed_rand, double *C, double *Psi_c, int *Groups, int *N_group, int *K_fast_s); double Psi_reg(double,double); double Psi_reg_prime(double,double); double Chi_prime(double,double); double Chi(double,double); void sampler_i(int, int, int *); int inverse(double **,double **, int); void matias_vec_vec(double **, double *, double *, int); void scalar_mat(double **, double, double **, int, int); void scalar_vec(double *, double, double *, int); void sum_mat(double **,double **, double **, int, int); void sum_vec(double *, double *, double *, int); void dif_mat(double **, double **, double **, int , int ); void dif_vec(double *, double *, double *, int); void mat_vec(double **, double *, double *, int, int); void mat_mat(double **, double **, double **, int, int, int); // void disp_vec(double *, int); // void disp_mat(double **, int, int); // void disp_mat_i(int **, int, int); // void disp_vec(double *, int); /* double **xb; */ double *Xb, **xb; int **boot_samp; double **x, **x2, **x3, **x4, *beta_m, *beta_s,*beta_aux; double *Fi, *res, *res_s, *w, *ww, dummyscale, scale; double *v, *v2, *v_aux, *yb; // , timefinish, timestart; double u,u2,s,c,Psi_constant; // double test_chi=0, test_psi=0; int n,p,m,seed; // ,*indices; int nboot=*size_boot; // int fake_p = 0; register int i,j,k; setbuf(stdout,NULL); c = *C; Psi_constant = *Psi_c; n = *N; p = *P; m = *M; seed = *Seed; boot_samp = (int **) malloc(m * sizeof(int*) ); for(i=0;i<m;i++) boot_samp[i] = (int*) malloc(nboot *sizeof(int)); // indices = (int *) malloc( n * sizeof(int) ); v = (double *) malloc( p * sizeof(double) ); v2 = (double *) malloc( p * sizeof(double) ); v_aux = (double *) malloc( p * sizeof(double) ); yb = (double *) malloc( n * sizeof(double) ); Xb = (double*) malloc( n * p * sizeof(double) ); x = (double **) malloc ( n * sizeof(double *) ); xb = (double **) malloc ( n * sizeof(double *) ); Fi = (double *) malloc ( n * sizeof(double) ); res = (double *) malloc ( n * sizeof(double) ); res_s = (double *) malloc ( n * sizeof(double) ); ww = (double *) malloc ( n * sizeof(double) ); w = (double *) malloc ( n * sizeof(double) ); x2 = (double **) malloc ( p * sizeof(double *) ); x3 = (double **) malloc ( p * sizeof(double *) ); x4 = (double **) malloc ( p * sizeof(double *) ); beta_aux = (double *) malloc( p * sizeof(double) ); beta_m = (double *) malloc( p * sizeof(double) ); beta_s = (double *) malloc( p * sizeof(double) ); for(i=0;i<n;i++) { x[i] = (double*) malloc (p * sizeof(double) ); xb[i] = (double*) malloc ((p+1) * sizeof(double) ); }; for(i=0;i<p;i++) { x2[i] = (double*) malloc (p * sizeof(double) ); x3[i] = (double*) malloc (p * sizeof(double) ); x4[i] = (double*) malloc (p * sizeof(double) ); }; /* copy X into x for easier handling */ for(i=0;i<n;i++) for(j=0;j<p;j++) x[i][j]=X[j*n+i]; /* calculate robust regression estimates */ for(i=0;i<m;i++) for(j=0;j<nboot;j++) boot_samp[i][j]=Boot_Samp[j*m+i]-1; R_S_rlm(X, y, N, P, Nres, max_it, &scale, Beta_s, Beta_m, converged_mm, &seed, &c, Psi_c, groups, n_group, k_fast_s); *Scale = scale; /* get M-fitted values in Fi */ mat_vec(x,Beta_m,Fi,n,p); /* get residuals of M-est in res */ dif_vec(y,Fi,res,n); /* get S-fitted values in res_s */ mat_vec(x,Beta_s,res_s,n,p); /* get residuals of S-est in res_s */ dif_vec(y,res_s,res_s,n); /* set auxiliary matrices to zero */ initialize_mat(x3, p, p); initialize_mat(x4, p, p); initialize_vec(v, p); u2 = 0.0; /* calculate correction matrix */ for(i=0;i<n;i++) { u = res[i]/scale ; w[i] = Psi_reg(u,Psi_constant)/res[i]; matias_vec_vec(x2,x[i],x[i],p); scalar_mat(x2,Psi_reg_prime(u,Psi_constant), x2,p,p); sum_mat(x3,x2,x3,p,p); matias_vec_vec(x2,x[i],x[i],p); scalar_mat(x2,w[i],x2,p,p); sum_mat(x4,x2,x4,p,p); scalar_vec(x[i],Psi_reg_prime(u,Psi_constant)*u,v_aux,p); sum_vec(v,v_aux,v,p); u2 += Chi_prime(u, c) * u; }; /* scalar_vec(v, .5 * (double) (n-p) * scale / u2 , v, p); */ scalar_vec(v, .5 * (double) n * scale / u2 , v, p); inverse(x3,x2,p); mat_mat(x2,x4,x3,p,p,p); mat_vec(x2,v,v2,p,p); scalar_mat(x3,scale,x3,p,p); /* the correction matrix is now in x3 */ /* the correction vector is now in v2 */ /* start the bootstrap replications */ for(i=0;i<m;i++) { /* change the seed! */ ++seed; // sampler_i(n,nboot,indices); // for(j=0;j<nboot; j++) // indices[j]=boot_samp[i][j]; /* get pseudo observed y's */ for(j=0;j<nboot;j++) /* xb[j][p] = */ yb[j] = y[boot_samp[i][j]]; for(j=0;j<nboot;j++) for(k=0;k<p;k++) { // xb[j][k] = x[boot_samp[i][j]][k]; // Xb[k*nboot+j] = X[k*n + indices[j]]; Xb[k*nboot+j] = x[boot_samp[i][j]][k]; xb[j][k] = Xb[k*nboot+j]; }; /* calculate full bootstrap estimate */ if( *calc_full == 1 ) R_S_rlm(Xb,yb,&nboot,P,Nres,max_it,&dummyscale, beta_s,beta_m,converged_mm,&seed,&c, Psi_c, groups, n_group, k_fast_s); /* void R_S_rlm(double *X, double *y, int *n, int *P, int *nres, int *max_it, double *SCale, double *beta_s, double *beta_m, int *converged_mm, int *seed_rand, double *C, double *Psi_c, int *Groups, int *N_group, int *K_fast_s) */ /* double *C, double *Psi_c, int *max_it, int *groups, int *n_group, int *k_fast_s); */ // HERE /* disp_mat(xb, nboot,p); */ // disp_vec(yb,nboot); // Rprintf("\nfull scale: %f", dummyscale); /* calculate robust bootsrap */ scalar_vec(v,0.0,v,p); /* v <- 0 */ scalar_mat(x2,0.0,x2,p,p); /* x2 <- 0 */ s = 0.0; for(j=0;j<nboot;j++) { scalar_vec(xb[j],yb[j]*w[boot_samp[i][j]],v_aux,p); sum_vec(v,v_aux,v,p); matias_vec_vec(x4,xb[j],xb[j],p); scalar_mat(x4,w[boot_samp[i][j]],x4,p,p); sum_mat(x2,x4,x2,p,p); s += Chi(res_s[boot_samp[i][j]] / scale , c); }; /* s = s * scale / .5 / (double) (nboot - p) ; */ s = s * scale / .5 / (double) n; inverse(x2,x4,p); /* x4 <- x2^-1 */ mat_vec(x4,v,v_aux,p,p); /* v_aux <- x4 * v */ dif_vec(v_aux,Beta_m,v_aux,p); /* v_aux <- v_aux - beta_m */ /* v has the robust bootstrapped vector, correct it */ mat_vec(x3,v_aux,v,p,p); /* v <- x3 * v_aux */ scalar_vec(v2,s-scale,v_aux,p); sum_vec(v_aux,v,v,p); /* store the betas (splus-wise!) */ for(j=0;j<p;j++) { ours[j*m+i]=v[j]; if( *calc_full == 1 ) // full[j*m+i]=beta_m[j]-Beta_m[j]; full[j*m+i]=beta_m[j]; }; }; for(i=0;i<m;i++) free(boot_samp[i]); free(boot_samp); for(i=0;i<n;i++) { free(x[i]); free(xb[i]); }; for(i=0;i<p;i++) { free(x2[i]); free(x3[i]); free(x4[i]); }; free(x) ;free(x2);free(xb); free(x3);free(x4); free(beta_aux);free(beta_m);free(beta_s); free(w);free(ww);free(Fi);free(res); free(v);free(v2);free(v_aux);free(yb); free(res_s); free(Xb); return(0); }