int main(int argc, char *argv[]) { size_t na = 1000; /* number of atoms */ size_t nr = 10000; /* number of reflections */ int compute_serial = 0; int times = 1; TYPE *h; /* h[j,0] == h, h[j,1] == k, h[j,2] == l */ TYPE *E; /* E[j,0] == real part of E, E[j,1] == imag part of E */ TYPE *E1; /* E[j,0] == real part of E, E[j,1] == imag part of E */ TYPE *a; /* a[j,0] == atomic number, a[j,1] == x, a[j,2] == y, a[j,3] == z */ double t0, dt1, dt2; int i; int numtask = 1; if (argc > 1) { numtask = atoi(argv[1]); } if (argc > 2) { na = atoi(argv[2]); nr = atoi(argv[3]); } if (argc > 4) times = atoi(argv[4]); size_t NH = DIM2_H * nr; size_t NA = DIM2_A * na; size_t NE = DIM2_E * nr; /*printf("Computation of crystallographic normalized structure factors\n" " on the CPU and the GPU\n\n"); printf("Number of atoms: %d\n", na); printf("Number of reflections: %d\n", nr); */ /* h = (TYPE*) malloc(sizeof(*h) * DIM2_H * nr); // 3*10000 30000 E = (TYPE*) malloc(sizeof(*E) * DIM2_E * nr); // 2*10000 20000 E1 = (TYPE*) malloc(sizeof(*E1) * DIM2_E * nr); // 2*10000 20000 a = (TYPE*) malloc(sizeof(*a) * DIM2_A * na); // 4*1000 4000 */ posix_memalign((void **) &h, getpagesize(), sizeof(*h) * DIM2_H * nr); posix_memalign((void **) &E, getpagesize(), sizeof(*E) * DIM2_E * nr); posix_memalign((void **) &E1, getpagesize(), sizeof(*E1) * DIM2_E * nr); posix_memalign((void **) &a, getpagesize(), sizeof(*a) * DIM2_A * na); for (i = 0; i < DIM2_E * nr; i++) E1[i] = E[i] = 0.0f; deta(na, a); deth(nr, h); int tt; //printf("Running the GPU code %d times\n", times); #pragma omp register([NA]a) #pragma omp register([NH]h) #pragma omp register([NE]E1) t0 = omp_get_wtime(); for (tt = 0; tt < times; tt++) { structfac_gpuss(na, nr, NA, a, NH, h, NE, E1, numtask); } #pragma omp taskwait dt2 = (omp_get_wtime() - t0);/// times; #if 0 if (compute_serial) { printf("Cuda: wallclock time seconds:%f\n", dt2); } else { printf("computation time (in seconds): %f\n", dt2); } #endif double sumdf = sumdif(E, E1, 2 * nr); //printf("Cuda: Sumdif: %f mean: %f\n", sumdf, sumdf / nr); double speed = 1.0e-9 * (NH * NA) / dt2; printf("%f,%d,%d,%d,%zd,%zd,%f,%.4lf\n", sumdf / nr, times, omp_get_num_threads(), numtask, na, nr, dt2, speed); return 0; }
bool gaussseidelMorphed(sData* data, double** s) { int curIter=0; double error; float tmp; double a1,a2,a3,a4,a5; int N=data->dimI-2; int M=data->dimJ-2; // allocate memory for derivatives double ***alpha = new double**[N+2]; double **temp1 = new double*[N+2]; double **temp2 = new double*[N+2]; double **temp3 = new double*[N+2]; double **temp4 = new double*[N+2]; double **temp5 = new double*[N+2]; double **temp6 = new double*[N+2]; double **temp7 = new double*[N+2]; double **temp8 = new double*[N+2]; for (int i=0;i<N+2;i++){ temp1[i] = new double[M+2]; temp2[i] = new double[M+2]; temp3[i] = new double[M+2]; temp4[i] = new double[M+2]; temp5[i] = new double[M+2]; temp6[i] = new double[M+2]; temp7[i] = new double[M+2]; temp8[i] = new double[M+2]; alpha[i] = new double* [M+2]; } for (int i=0;i<N+2;i++){ for(int j=0;j<M+2;j++){ alpha[i][j] = new double[5]; } } // write derivatives dxi(data,temp1,temp2); deta(data,temp3,temp4); ddxi(data,temp5,temp6); ddeta(data,temp7,temp8); // calculate alpha for (int i=1;i<data->dimI-1;i++){ for(int j=1;j<data->dimJ-1;j++){ alpha[i][j][0] = temp1[i][j]*temp1[i][j]+temp2[i][j]*temp2[i][j]; //alpha1 alpha[i][j][1] = temp3[i][j]*temp3[i][j]+temp4[i][j]*temp4[i][j]; //alpha2 alpha[i][j][2] = 2*(temp1[i][j]*temp3[i][j]+temp2[i][j]*temp4[i][j]); //alpha3 alpha[i][j][3] = temp5[i][j]+temp6[i][j]; //alpha4 alpha[i][j][4] = temp7[i][j]+temp8[i][j]; //alpha5 } } // free memory for (int i=0;i<N+2;i++){ delete[] temp1[i]; delete[] temp2[i]; delete[] temp3[i]; delete[] temp4[i]; delete[] temp5[i]; delete[] temp6[i]; delete[] temp7[i]; delete[] temp8[i]; } delete[] temp1; delete[] temp2; delete[] temp3; delete[] temp4; delete[] temp5; delete[] temp6; delete[] temp7; delete[] temp8; while(curIter<data->maxIter) { /*std::cout << "\r\tGauss-Seidel: Iteration " <<*/ ++curIter; error =0; for(int i = 1; i < data->dimI-1; i++) { for(int j = 1 ; j < data->dimJ-1; j++) { a1 = alpha[i][j][0]; a2 = alpha[i][j][1]; a3 = alpha[i][j][2]; a4 = alpha[i][j][3]; a5 = alpha[i][j][4]; tmp = s[i+1][j+1] * (a3/4.f) + s[i+1][j] * (a1+a4/2.f) + s[i+1][j-1] * (-a3/4.f) + s[i][j+1] * (a2+a5/2.f) + s[i][j-1] * (a2-a5/2.f) + s[i-1][j+1] * (-a3/4.f) + s[i-1][j] * (a1-a4/2.f) + s[i-1][j-1] * (a3/4.f); tmp /=(2*(a1+a2)); error += fabs(tmp-s[i][j]); s[i][j] = tmp; } } if(error < data->residuum){ std::cout << "Residual r = "<< error << ", after " << curIter << "# iterations \t"; return true; } } return true; }