Beispiel #1
0
int main(int argc, char *argv[]) {
    size_t na = 1000; /* number of atoms */
    size_t nr = 10000; /* number of reflections */
    int compute_serial = 0;
    int times = 1;
    TYPE *h; /* h[j,0] == h, h[j,1] == k, h[j,2] == l */
    TYPE *E; /* E[j,0] == real part of E, E[j,1] == imag part of E */
    TYPE *E1; /* E[j,0] == real part of E, E[j,1] == imag part of E */
    TYPE *a; /* a[j,0] == atomic number, a[j,1] == x, a[j,2] == y, a[j,3] == z */
    double t0, dt1, dt2;
    int i;

    int numtask = 1;

    if (argc > 1) {
        numtask = atoi(argv[1]);
    }

    if (argc > 2) {
        na = atoi(argv[2]);
        nr = atoi(argv[3]);
    }
    if (argc > 4)
        times = atoi(argv[4]);

    size_t NH = DIM2_H * nr;
    size_t NA = DIM2_A * na;
    size_t NE = DIM2_E * nr;

    /*printf("Computation of crystallographic normalized structure factors\n"
     "                on the CPU and the GPU\n\n");
     printf("Number of atoms:       %d\n", na);
     printf("Number of reflections: %d\n", nr);
     */

    /*
     h = (TYPE*) malloc(sizeof(*h) * DIM2_H * nr);   // 3*10000 30000
     E = (TYPE*) malloc(sizeof(*E) * DIM2_E * nr);   // 2*10000 20000
     E1 = (TYPE*) malloc(sizeof(*E1) * DIM2_E * nr); // 2*10000 20000
     a = (TYPE*) malloc(sizeof(*a) * DIM2_A * na);   // 4*1000   4000
     */

    posix_memalign((void **) &h, getpagesize(), sizeof(*h) * DIM2_H * nr);
    posix_memalign((void **) &E, getpagesize(), sizeof(*E) * DIM2_E * nr);
    posix_memalign((void **) &E1, getpagesize(), sizeof(*E1) * DIM2_E * nr);
    posix_memalign((void **) &a, getpagesize(), sizeof(*a) * DIM2_A * na);

    for (i = 0; i < DIM2_E * nr; i++)
        E1[i] = E[i] = 0.0f;

    deta(na, a);
    deth(nr, h);


    int tt;
    //printf("Running the GPU code %d times\n", times);

#pragma omp register([NA]a)
#pragma omp register([NH]h)
#pragma omp register([NE]E1)


    t0 = omp_get_wtime();
    for (tt = 0; tt < times; tt++) {
        structfac_gpuss(na, nr, NA, a, NH, h, NE, E1, numtask);
    }

#pragma omp taskwait

    dt2 = (omp_get_wtime() - t0);/// times;
#if 0
    if (compute_serial) {
        printf("Cuda:      wallclock time seconds:%f\n", dt2);
    } else {
        printf("computation time (in seconds): %f\n", dt2);
    }
#endif
    double sumdf = sumdif(E, E1, 2 * nr);
    //printf("Cuda:      Sumdif: %f mean: %f\n", sumdf, sumdf / nr);

    double speed = 1.0e-9 * (NH * NA) / dt2;
    printf("%f,%d,%d,%d,%zd,%zd,%f,%.4lf\n", sumdf / nr, times, omp_get_num_threads(), numtask, na, nr, dt2, speed);

    return 0;
}
Beispiel #2
0
bool gaussseidelMorphed(sData* data, double** s)
{
  int curIter=0;
  double error;
  float tmp;
  double a1,a2,a3,a4,a5;
  int N=data->dimI-2;
  int M=data->dimJ-2;

  // allocate memory for derivatives
  double ***alpha = new double**[N+2];
  double **temp1 = new double*[N+2];
  double **temp2 = new double*[N+2];
  double **temp3 = new double*[N+2];
  double **temp4 = new double*[N+2];
  double **temp5 = new double*[N+2];
  double **temp6 = new double*[N+2];
  double **temp7 = new double*[N+2];
  double **temp8 = new double*[N+2];

  for (int i=0;i<N+2;i++){
      temp1[i] = new double[M+2];
      temp2[i] = new double[M+2];
      temp3[i] = new double[M+2];
      temp4[i] = new double[M+2];
      temp5[i] = new double[M+2];
      temp6[i] = new double[M+2];
      temp7[i] = new double[M+2];
      temp8[i] = new double[M+2];
      alpha[i] = new double* [M+2];
  }
  for (int i=0;i<N+2;i++){
      for(int j=0;j<M+2;j++){
          alpha[i][j] = new double[5];
      }
  }

  // write derivatives
  dxi(data,temp1,temp2);
  deta(data,temp3,temp4);
  ddxi(data,temp5,temp6);
  ddeta(data,temp7,temp8);

  // calculate alpha
  for (int i=1;i<data->dimI-1;i++){
      for(int j=1;j<data->dimJ-1;j++){
          alpha[i][j][0] = temp1[i][j]*temp1[i][j]+temp2[i][j]*temp2[i][j];     //alpha1
          alpha[i][j][1] = temp3[i][j]*temp3[i][j]+temp4[i][j]*temp4[i][j];     //alpha2
          alpha[i][j][2] = 2*(temp1[i][j]*temp3[i][j]+temp2[i][j]*temp4[i][j]); //alpha3
          alpha[i][j][3] = temp5[i][j]+temp6[i][j];                             //alpha4
          alpha[i][j][4] = temp7[i][j]+temp8[i][j];                             //alpha5
      }
  }

  // free memory
  for (int i=0;i<N+2;i++){
      delete[] temp1[i];
      delete[] temp2[i];
      delete[] temp3[i];
      delete[] temp4[i];
      delete[] temp5[i];
      delete[] temp6[i];
      delete[] temp7[i];
      delete[] temp8[i];
  }
  delete[] temp1;
  delete[] temp2;
  delete[] temp3;
  delete[] temp4;
  delete[] temp5;
  delete[] temp6;
  delete[] temp7;
  delete[] temp8;

  while(curIter<data->maxIter) {
      /*std::cout << "\r\tGauss-Seidel: Iteration " <<*/ ++curIter;
      error =0;
      for(int i = 1; i < data->dimI-1; i++)
        {
          for(int j = 1 ; j < data->dimJ-1; j++)
            {
              a1 = alpha[i][j][0];
              a2 = alpha[i][j][1];
              a3 = alpha[i][j][2];
              a4 = alpha[i][j][3];
              a5 = alpha[i][j][4];

              tmp =    s[i+1][j+1]   * (a3/4.f)
                     + s[i+1][j]     * (a1+a4/2.f)
                     + s[i+1][j-1]   * (-a3/4.f)
                     + s[i][j+1]     * (a2+a5/2.f)
                     + s[i][j-1]     * (a2-a5/2.f)
                     + s[i-1][j+1]   * (-a3/4.f)
                     + s[i-1][j]     * (a1-a4/2.f)
                     + s[i-1][j-1]   * (a3/4.f);
              tmp /=(2*(a1+a2));

              error += fabs(tmp-s[i][j]);
              s[i][j] = tmp;
            }
        }

      if(error < data->residuum){
        std::cout << "Residual r = "<< error << ", after "
            << curIter << "# iterations \t";

        return true;
      }
  }

  return true;
}