Exemple #1
0
enum efp_result
efp_compute_id_direct(struct efp *efp)
{
	double *c;
	size_t n;
	fortranint_t *ipiv;
	enum efp_result res;

	n = 3 * efp->n_polarizable_pts;
	c = (double *)calloc(n * n, sizeof *c);
	ipiv = (fortranint_t *)calloc(n, sizeof *ipiv);

	if (c == NULL || ipiv == NULL) {
		res = EFP_RESULT_NO_MEMORY;
		goto error;
	}

	/* induced dipoles */
	compute_lhs(efp, c, 0);
	compute_rhs(efp, efp->indip, 0);
	transpose_matrix(c, n);

	if (efp_dgesv((fortranint_t)n, 1, c, (fortranint_t)n, ipiv,
	    (double *)efp->indip, (fortranint_t)n) != 0) {
		efp_log("dgesv: error solving for induced dipoles");
		res = EFP_RESULT_FATAL;
		goto error;
	}

	/* conjugate induced dipoles */
	compute_lhs(efp, c, 1);
	compute_rhs(efp, efp->indipconj, 1);
	transpose_matrix(c, n);

	if (efp_dgesv((fortranint_t)n, 1, c, (fortranint_t)n, ipiv,
	    (double *)efp->indipconj, (fortranint_t)n) != 0) {
		efp_log("dgesv: error solving for conjugate induced dipoles");
		res = EFP_RESULT_FATAL;
		goto error;
	}
	res = EFP_RESULT_SUCCESS;
error:
	free(c);
	free(ipiv);
	return res;
}
Exemple #2
0
void compute_gluing_equations(
	Triangulation	*manifold)
{
	compute_holonomies(manifold);
	compute_edge_angle_sums(manifold);
	initialize_gluing_equations(manifold);
	compute_derivative(manifold);
	compute_rhs(manifold);
}
Exemple #3
0
void adi(int local_grid_points[3])
{
  if (timeron) timer_start(t_rhs);
  compute_rhs();
  if (timeron) timer_stop(t_rhs);

  if (timeron) timer_start(t_xsolve);
  x_solve();
  if (timeron) timer_stop(t_xsolve);

  if (timeron) timer_start(t_ysolve);
  y_solve();
  if (timeron) timer_stop(t_ysolve);

  if (timeron) timer_start(t_zsolve);
  z_solve();
  if (timeron) timer_stop(t_zsolve);

  if (timeron) timer_start(t_add);
  add(local_grid_points);
  if (timeron) timer_stop(t_add);
}
Exemple #4
0
//---------------------------------------------------------------------
// verification routine                         
//---------------------------------------------------------------------
void verify(int no_time_steps, char *Class, logical *verified)
{
  double xcrref[5], xceref[5], xcrdif[5], xcedif[5]; 
  double epsilon, xce[5], xcr[5], dtref = 0.0;
  int m;

  //---------------------------------------------------------------------
  // tolerance level
  //---------------------------------------------------------------------
  epsilon = 1.0e-08;

  //---------------------------------------------------------------------
  // compute the error norm and the residual norm, and exit if not printing
  //---------------------------------------------------------------------
  error_norm(xce);
  compute_rhs();

  rhs_norm(xcr);

  for (m = 0; m < 5; m++) {
    xcr[m] = xcr[m] / dt;
  }

  *Class = 'U';
  *verified = true;

  for (m = 0; m < 5; m++) {
    xcrref[m] = 1.0;
    xceref[m] = 1.0;
  }

  //---------------------------------------------------------------------
  // reference data for 12X12X12 grids after 60 time steps, with DT = 1.0e-02
  //---------------------------------------------------------------------
  if ( (grid_points[0] == 12) && (grid_points[1] == 12) &&
       (grid_points[2] == 12) && (no_time_steps == 60))  {

    *Class = 'S';
    dtref = 1.0e-2;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 1.7034283709541311e-01;
    xcrref[1] = 1.2975252070034097e-02;
    xcrref[2] = 3.2527926989486055e-02;
    xcrref[3] = 2.6436421275166801e-02;
    xcrref[4] = 1.9211784131744430e-01;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 4.9976913345811579e-04;
    xceref[1] = 4.5195666782961927e-05;
    xceref[2] = 7.3973765172921357e-05;
    xceref[3] = 7.3821238632439731e-05;
    xceref[4] = 8.9269630987491446e-04;

    //---------------------------------------------------------------------
    // reference data for 24X24X24 grids after 200 time steps, 
    // with DT = 0.8e-3
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 24) && (grid_points[1] == 24) &&
              (grid_points[2] == 24) && (no_time_steps == 200) ) {

    *Class = 'W';
    dtref = 0.8e-3;
    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.1125590409344e+03;
    xcrref[1] = 0.1180007595731e+02;
    xcrref[2] = 0.2710329767846e+02;
    xcrref[3] = 0.2469174937669e+02;
    xcrref[4] = 0.2638427874317e+03;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.4419655736008e+01;
    xceref[1] = 0.4638531260002e+00;
    xceref[2] = 0.1011551749967e+01;
    xceref[3] = 0.9235878729944e+00;
    xceref[4] = 0.1018045837718e+02;

    //---------------------------------------------------------------------
    // reference data for 64X64X64 grids after 200 time steps, 
    // with DT = 0.8e-3
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 64) && (grid_points[1] == 64) &&
              (grid_points[2] == 64) && (no_time_steps == 200) ) {

    *Class = 'A';
    dtref = 0.8e-3;
    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 1.0806346714637264e+02;
    xcrref[1] = 1.1319730901220813e+01;
    xcrref[2] = 2.5974354511582465e+01;
    xcrref[3] = 2.3665622544678910e+01;
    xcrref[4] = 2.5278963211748344e+02;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 4.2348416040525025e+00;
    xceref[1] = 4.4390282496995698e-01;
    xceref[2] = 9.6692480136345650e-01;
    xceref[3] = 8.8302063039765474e-01;
    xceref[4] = 9.7379901770829278e+00;

    //---------------------------------------------------------------------
    // reference data for 102X102X102 grids after 200 time steps,
    // with DT = 3.0e-04
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 102) && (grid_points[1] == 102) &&
              (grid_points[2] == 102) && (no_time_steps == 200) ) {

    *Class = 'B';
    dtref = 3.0e-4;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 1.4233597229287254e+03;
    xcrref[1] = 9.9330522590150238e+01;
    xcrref[2] = 3.5646025644535285e+02;
    xcrref[3] = 3.2485447959084092e+02;
    xcrref[4] = 3.2707541254659363e+03;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 5.2969847140936856e+01;
    xceref[1] = 4.4632896115670668e+00;
    xceref[2] = 1.3122573342210174e+01;
    xceref[3] = 1.2006925323559144e+01;
    xceref[4] = 1.2459576151035986e+02;

    //---------------------------------------------------------------------
    // reference data for 162X162X162 grids after 200 time steps,
    // with DT = 1.0e-04
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 162) && (grid_points[1] == 162) &&
              (grid_points[2] == 162) && (no_time_steps == 200) ) {

    *Class = 'C';
    dtref = 1.0e-4;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.62398116551764615e+04;
    xcrref[1] = 0.50793239190423964e+03;
    xcrref[2] = 0.15423530093013596e+04;
    xcrref[3] = 0.13302387929291190e+04;
    xcrref[4] = 0.11604087428436455e+05;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.16462008369091265e+03;
    xceref[1] = 0.11497107903824313e+02;
    xceref[2] = 0.41207446207461508e+02;
    xceref[3] = 0.37087651059694167e+02;
    xceref[4] = 0.36211053051841265e+03;

    //---------------------------------------------------------------------
    // reference data for 408x408x408 grids after 250 time steps,
    // with DT = 0.2e-04
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 408) && (grid_points[1] == 408) &&
              (grid_points[2] == 408) && (no_time_steps == 250) ) {

    *Class = 'D';
    dtref = 0.2e-4;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.2533188551738e+05;
    xcrref[1] = 0.2346393716980e+04;
    xcrref[2] = 0.6294554366904e+04;
    xcrref[3] = 0.5352565376030e+04;
    xcrref[4] = 0.3905864038618e+05;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.3100009377557e+03;
    xceref[1] = 0.2424086324913e+02;
    xceref[2] = 0.7782212022645e+02;
    xceref[3] = 0.6835623860116e+02;
    xceref[4] = 0.6065737200368e+03;

    //---------------------------------------------------------------------
    // reference data for 1020x1020x1020 grids after 250 time steps,
    // with DT = 0.4e-05
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 1020) && (grid_points[1] == 1020) &&
              (grid_points[2] == 1020) && (no_time_steps == 250) ) {

    *Class = 'E';
    dtref = 0.4e-5;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.9795372484517e+05;
    xcrref[1] = 0.9739814511521e+04;
    xcrref[2] = 0.2467606342965e+05;
    xcrref[3] = 0.2092419572860e+05;
    xcrref[4] = 0.1392138856939e+06;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.4327562208414e+03;
    xceref[1] = 0.3699051964887e+02;
    xceref[2] = 0.1089845040954e+03;
    xceref[3] = 0.9462517622043e+02;
    xceref[4] = 0.7765512765309e+03;

  } else {
    *verified = false;
  }

  //---------------------------------------------------------------------
  // verification test for residuals if gridsize is one of 
  // the defined grid sizes above (*Class != 'U')
  //---------------------------------------------------------------------

  //---------------------------------------------------------------------
  // Compute the difference of solution values and the known reference values.
  //---------------------------------------------------------------------
  for (m = 0; m < 5; m++) {
    xcrdif[m] = fabs((xcr[m]-xcrref[m])/xcrref[m]);
    xcedif[m] = fabs((xce[m]-xceref[m])/xceref[m]);
  }

  //---------------------------------------------------------------------
  // Output the comparison of computed results to known cases.
  //---------------------------------------------------------------------
  if (*Class != 'U') {
    printf(" Verification being performed for class %c\n", *Class);
    printf(" accuracy setting for epsilon = %20.13E\n", epsilon);
    *verified = (fabs(dt-dtref) <= epsilon);
    if (!(*verified)) {  
      *Class = 'U';
      printf(" DT does not match the reference value of %15.8E\n", dtref);
    }
  } else { 
    printf(" Unknown class\n");
  }

  if (*Class != 'U') {
    printf(" Comparison of RMS-norms of residual\n");
  } else {
    printf(" RMS-norms of residual\n");
  }

  for (m = 0; m < 5; m++) {
    if (*Class == 'U') {
      printf("          %2d%20.13E\n", m+1, xcr[m]);
    } else if (xcrdif[m] <= epsilon) {
      printf("          %2d%20.13E%20.13E%20.13E\n", 
          m+1, xcr[m], xcrref[m], xcrdif[m]);
    } else { 
      *verified = false;
      printf(" FAILURE: %2d%20.13E%20.13E%20.13E\n",
          m+1, xcr[m], xcrref[m], xcrdif[m]);
    }
  }

  if (*Class != 'U') {
    printf(" Comparison of RMS-norms of solution error\n");
  } else {
    printf(" RMS-norms of solution error\n");
  }

  for (m = 0; m < 5; m++) {
    if (*Class == 'U') {
      printf("          %2d%20.13E\n", m+1, xce[m]);
    } else if (xcedif[m] <= epsilon) {
      printf("          %2d%20.13E%20.13E%20.13E\n", 
          m+1, xce[m], xceref[m], xcedif[m]);
    } else {
      *verified = false;
      printf(" FAILURE: %2d%20.13E%20.13E%20.13E\n",
          m+1, xce[m], xceref[m], xcedif[m]);
    }
  }

  if (*Class == 'U') {
    printf(" No reference values provided\n");
    printf(" No verification performed\n");
  } else if (*verified) {
    printf(" Verification Successful\n");
  } else {
    printf(" Verification failed\n");
  }
}
    sipg_sem_2d_multigpu ( MPI_Comm CartComm,
                           int _order, 
                           const square_mesh_multigpu<FLOAT_TYPE> & _mesh,
                           FLOAT_TYPE (*f)(FLOAT_TYPE, FLOAT_TYPE),
                           FLOAT_TYPE (*u_ex)(FLOAT_TYPE, FLOAT_TYPE), 
                           FLOAT_TYPE (*dx_u_ex)(FLOAT_TYPE, FLOAT_TYPE), 
                           FLOAT_TYPE (*dy_u_ex)(FLOAT_TYPE, FLOAT_TYPE),
                           FLOAT_TYPE _tol )
     : qt(_order+1),
       basis(_order),
       pen(100*_order*_order),
       output(_mesh.noe(), _order+1),

       mesh(_mesh),
       dot_product(mesh.device_info, _order),

       he(typename pattern_type::grid_type::period_type(0, 0, 0), CartComm)
    {

      const int noe = _mesh.noe();
      order = _order;
      
      // define block sizes

      const int vec_noe = output.get_noe();
      const int blockD = 128;
      volume_gridSIZE = dim3( (vec_noe + blockD - 1)/blockD , order+1, order+1);
      volume_blockSIZE = dim3(blockD, 1, 1); 

      const int dimx = mesh.device_info.get_dimx();
      const int dimy = mesh.device_info.get_dimy();
      const int blockDx = 32;
      const int blockDy = 4;

      flux_gridSIZE = dim3( (dimx + blockDx - 1)/blockDx, (dimy + blockDy - 1)/blockDy, 1 );
      flux_blockSIZE = dim3( blockDx, blockDy, 1 );


      // initialize
      load_Dphi_table<FLOAT_TYPE>(order);
      load_lgl_quadrature_table<FLOAT_TYPE>(order);

 
#ifdef USE_MODE_MATRIX
      host_laplacian_matrix<FLOAT_TYPE,int> h_volume_matrix(1, order);
      d_volume_matrix = h_volume_matrix;
#endif
#ifdef USE_PRECONDITIONER
      host_preconditioner_matrix<FLOAT_TYPE,int> h_prec_matrix(1, order, pen);
      d_prec_matrix = h_prec_matrix;
#endif


      host_mode_vector<FLOAT_TYPE,int> h_xx(noe, order+1);

      setup_GCL();

      compute_rhs(f, u_ex);

#ifndef __MVM_MULTIGPU_TEST__
      copy(h_xx, d_u);

#ifdef USE_PRECONDITIONER
      iterations = precoditioned_conjugate_gradient_multigpu(*(this), d_u, d_rhs, _tol);
#else
      iterations = conjugate_gradient_multigpu(*(this), d_u, d_rhs, _tol);
#endif

      // copy back the solution 
      copy(d_u, solution);

      err_norms(solution, f, u_ex, dx_u_ex, dy_u_ex);
#endif
    }
Exemple #6
0
//---------------------------------------------------------------------
// this function copies the face values of a variable defined on a set 
// of cells to the overlap locations of the adjacent sets of cells. 
// Because a set of cells interfaces in each direction with exactly one 
// other set, we only need to fill six different buffers. We could try to 
// overlap communication with computation, by computing
// some internal values while communicating boundary values, but this
// adds so much overhead that it's not clearly useful. 
//---------------------------------------------------------------------
void copy_faces()
{
  int c, i;
  cl_int ecode = 0;

  //---------------------------------------------------------------------
  // exit immediately if there are no faces to be copied           
  //---------------------------------------------------------------------
  if (num_devices == 1) {
    compute_rhs();
    return;
  }

  //---------------------------------------------------------------------
  // because the difference stencil for the diagonalized scheme is 
  // orthogonal, we do not have to perform the staged copying of faces, 
  // but can send all face information simultaneously to the neighboring 
  // cells in all directions          
  //---------------------------------------------------------------------
  if (timeron) timer_start(t_bpack);

  for (c = 0; c < ncells; c++) {
    for (i = 0; i < num_devices; i++) {
      ecode = clEnqueueNDRangeKernel(cmd_queue[i * 2],
                                     k_copy_faces1[i][c],
                                     COPY_FACES1_DIM, NULL,
                                     copy_faces1_gw[i][c],
                                     copy_faces1_lw[i][c],
                                     0, NULL, NULL);
      clu_CheckError(ecode, "clEnqueueNDRange() for copy_faces1");
    }

    for (i = 0; i < num_devices; i++) {
      ecode = clEnqueueNDRangeKernel(cmd_queue[i * 2],
                                     k_copy_faces2[i][c],
                                     COPY_FACES2_DIM, NULL,
                                     copy_faces2_gw[i][c],
                                     copy_faces2_lw[i][c],
                                     0, NULL, NULL);
      clu_CheckError(ecode, "clEnqueueNDRange() for copy_faces2");

      ecode = clEnqueueNDRangeKernel(cmd_queue[i * 2],
                                     k_copy_faces3[i][c],
                                     COPY_FACES3_DIM, NULL,
                                     copy_faces3_gw[i][c],
                                     copy_faces3_lw[i][c],
                                     0, NULL, NULL);
      clu_CheckError(ecode, "clEnqueueNDRange() for copy_faces3");
    }

    for (i = 0; i < num_devices; i++) {
      CHECK_FINISH(i * 2);
    }

  }

  if (timeron) timer_stop(t_bpack);

  if (timeron) timer_start(t_exch);
  for (i = 0; i < num_devices; i++) {
    CHECK_FINISH(i * 2);

    ecode = clEnqueueCopyBuffer(cmd_queue[successor[i][0] * 2 + 1],
                                m_out_buffer[i],
                                m_in_buffer[successor[i][0]],
                                start_send_east[i]*sizeof(double),
                                start_recv_west[successor[i][0]]*sizeof(double),
                                east_size[i]*sizeof(double),
                                0, NULL, NULL);
        CHECK_FINISH(successor[i][0] * 2 + 1);
  }

  for (i = 0; i < num_devices; i++) {
    ecode = clEnqueueCopyBuffer(cmd_queue[predecessor[i][0] * 2 + 1],
                                m_out_buffer[i],
                                m_in_buffer[predecessor[i][0]],
                                start_send_west[i]*sizeof(double),
                                start_recv_east[predecessor[i][0]]*sizeof(double),
                                west_size[i]*sizeof(double),
                                0, NULL, NULL);

        CHECK_FINISH(predecessor[i][0] * 2 + 1);
    ecode = clEnqueueCopyBuffer(cmd_queue[successor[i][1] * 2 + 1],
                                m_out_buffer[i],
                                m_in_buffer[successor[i][1]],
                                start_send_north[i]*sizeof(double),
                                start_recv_south[successor[i][1]]*sizeof(double),
                                north_size[i]*sizeof(double),
                                0, NULL, NULL);

        CHECK_FINISH(successor[i][1] * 2 + 1);
    ecode = clEnqueueCopyBuffer(cmd_queue[predecessor[i][1] * 2 + 1],
                                m_out_buffer[i],
                                m_in_buffer[predecessor[i][1]],
                                start_send_south[i]*sizeof(double),
                                start_recv_north[predecessor[i][1]]*sizeof(double),
                                south_size[i]*sizeof(double),
                                0, NULL, NULL);

        CHECK_FINISH(predecessor[i][1] * 2 + 1);
    ecode = clEnqueueCopyBuffer(cmd_queue[successor[i][2] * 2 + 1],
                                m_out_buffer[i],
                                m_in_buffer[successor[i][2]],
                                start_send_top[i]*sizeof(double),
                                start_recv_bottom[successor[i][2]]*sizeof(double),
                                top_size[i]*sizeof(double),
                                0, NULL, NULL);

        CHECK_FINISH(successor[i][2] * 2 + 1);
    ecode = clEnqueueCopyBuffer(cmd_queue[predecessor[i][2] * 2 + 1],
                                m_out_buffer[i],
                                m_in_buffer[predecessor[i][2]],
                                start_send_bottom[i]*sizeof(double),
                                start_recv_top[predecessor[i][2]]*sizeof(double),
                                bottom_size[i]*sizeof(double),
                                0, NULL, NULL);
        CHECK_FINISH(predecessor[i][2] * 2 + 1);
  }
  if (timeron) timer_stop(t_exch);

  //---------------------------------------------------------------------
  // unpack the data that has just been received;             
  //---------------------------------------------------------------------
  if (timeron) timer_start(t_bpack);

  for (c = 0; c < ncells; c++) {
    for (i = 0; i < num_devices; i++) {
      if (c == 0) CHECK_FINISH(i * 2 + 1);

      ecode = clEnqueueNDRangeKernel(cmd_queue[i * 2],
                                     k_copy_faces4[i][c],
                                     COPY_FACES4_DIM, NULL,
                                     copy_faces4_gw[i][c],
                                     copy_faces4_lw[i][c],
                                     0, NULL, NULL);
      clu_CheckError(ecode, "clEnqueueNDRange() for copy_faces4");
    }

    for (i = 0; i < num_devices; i++) {
      ecode = clEnqueueNDRangeKernel(cmd_queue[i * 2],
                                     k_copy_faces5[i][c],
                                     COPY_FACES5_DIM, NULL,
                                     copy_faces5_gw[i][c],
                                     copy_faces5_lw[i][c],
                                     0, NULL, NULL);
      clu_CheckError(ecode, "clEnqueueNDRange() for copy_faces5");

      ecode = clEnqueueNDRangeKernel(cmd_queue[i * 2],
                                     k_copy_faces6[i][c],
                                     COPY_FACES6_DIM, NULL,
                                     copy_faces6_gw[i][c],
                                     copy_faces6_lw[i][c],
                                     0, NULL, NULL);
      clu_CheckError(ecode, "clEnqueueNDRange() for copy_faces6");
    }

    for (i = 0; i < num_devices; i++) {
      CHECK_FINISH(i * 2);
    }
  }

  if (timeron) timer_stop(t_bpack);

  for (i = 0; i < num_devices; i++)
    CHECK_FINISH(i * 2);

  //---------------------------------------------------------------------
  // now that we have all the data, compute the rhs
  //---------------------------------------------------------------------
  compute_rhs();
}
    sipg_sem_2d ( int _order, 
                  const square_mesh<FLOAT_TYPE> & _mesh,
                  FLOAT_TYPE (*f)(FLOAT_TYPE, FLOAT_TYPE),
                  FLOAT_TYPE (*u_ex)(FLOAT_TYPE, FLOAT_TYPE), 
                  FLOAT_TYPE (*dx_u_ex)(FLOAT_TYPE, FLOAT_TYPE), 
                  FLOAT_TYPE (*dy_u_ex)(FLOAT_TYPE, FLOAT_TYPE),
                  FLOAT_TYPE _pen,
                  FLOAT_TYPE _tol )
     : qt(_order+1),
       basis(_order),
       pen(_pen)
    {

      const int noe = _mesh.dim()*_mesh.dim();
      order = _order;
      mesh = _mesh;
      // initialize
      load_Dphi_table<FLOAT_TYPE>(order);
      load_lgl_quadrature_table<FLOAT_TYPE>(order);

      const int blockD = 128;
      volume_gridSIZE = dim3( (noe + blockD - 1)/blockD , order+1, order+1);
      volume_blockSIZE = dim3(blockD, 1, 1); 

      const int dimx = mesh.device_info.get_dimx();
      const int dimy = mesh.device_info.get_dimy();
      const int blockDx = 32;
      const int blockDy = 4;

      flux_gridSIZE = dim3( (dimx + blockDx - 1)/blockDx, (dimy + blockDy - 1)/blockDy, 1 );
      flux_blockSIZE = dim3( blockDx, blockDy, 1 );


#ifdef USE_MODE_MATRIX
      host_laplacian_matrix<FLOAT_TYPE,int> h_volume_matrix(1, order);
      d_volume_matrix = h_volume_matrix;
#endif

#ifdef USE_PRECONDITIONER
      host_preconditioner_matrix<FLOAT_TYPE,int> h_prec_matrix(1, order, pen);
      d_prec_matrix = h_prec_matrix;
#endif

      host_mode_vector<FLOAT_TYPE,int> h_xx(noe, order+1);
      copy(h_xx, d_u);

      compute_rhs(f, u_ex);


      system_solution_time.start();
#ifdef USE_PRECONDITIONER
      iterations = preconditioned_conjugate_gradient(*(this), d_u, d_rhs, _tol);
#else
      iterations = conjugate_gradient(*(this), d_u, d_rhs, _tol);
#endif
      system_solution_time.stop();

      // copy back the solution 
      copy(d_u, solution);

      err_norms(solution, f, u_ex, dx_u_ex, dy_u_ex);

    }
Exemple #8
0
//---------------------------------------------------------------------
// verification routine                         
//---------------------------------------------------------------------
void verify(int no_time_steps, char *Class, logical *verified)
{
  double xcrref[5], xceref[5], xcrdif[5], xcedif[5];
  double epsilon, xce[5], xcr[5], dtref = 0.0;
  int m;

  //---------------------------------------------------------------------
  // tolerance level
  //---------------------------------------------------------------------
  epsilon = 1.0e-08;


  //---------------------------------------------------------------------
  // compute the error norm and the residual norm, and exit if not printing
  //---------------------------------------------------------------------
  error_norm(xce);
  compute_rhs();

  rhs_norm(xcr);

  for (m = 0; m < 5; m++) {
    xcr[m] = xcr[m] / dt;
  }

  *Class = 'U';
  *verified = true;

  for (m = 0; m < 5; m++) {
    xcrref[m] = 1.0;
    xceref[m] = 1.0;
  }

  //---------------------------------------------------------------------
  // reference data for 12X12X12 grids after 100 time steps, 
  // with DT = 1.50e-02
  //---------------------------------------------------------------------
  if ( (grid_points[0] == 12) && (grid_points[1] == 12) &&
       (grid_points[2] == 12) && (no_time_steps == 100) ) {
    *Class = 'S';
    dtref = 1.5e-2;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 2.7470315451339479e-02;
    xcrref[1] = 1.0360746705285417e-02;
    xcrref[2] = 1.6235745065095532e-02;
    xcrref[3] = 1.5840557224455615e-02;
    xcrref[4] = 3.4849040609362460e-02;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 2.7289258557377227e-05;
    xceref[1] = 1.0364446640837285e-05;
    xceref[2] = 1.6154798287166471e-05;
    xceref[3] = 1.5750704994480102e-05;
    xceref[4] = 3.4177666183390531e-05;

    //---------------------------------------------------------------------
    // reference data for 36X36X36 grids after 400 time steps, 
    // with DT = 1.5e-03
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 36) && (grid_points[1] == 36) &&
              (grid_points[2] == 36) && (no_time_steps == 400) ) {
    *Class = 'W';
    dtref = 1.5e-3;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.1893253733584e-02;
    xcrref[1] = 0.1717075447775e-03;
    xcrref[2] = 0.2778153350936e-03;
    xcrref[3] = 0.2887475409984e-03;
    xcrref[4] = 0.3143611161242e-02;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.7542088599534e-04;
    xceref[1] = 0.6512852253086e-05;
    xceref[2] = 0.1049092285688e-04;
    xceref[3] = 0.1128838671535e-04;
    xceref[4] = 0.1212845639773e-03;

    //---------------------------------------------------------------------
    // reference data for 64X64X64 grids after 400 time steps, 
    // with DT = 1.5e-03
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 64) && (grid_points[1] == 64) &&
              (grid_points[2] == 64) && (no_time_steps == 400) ) {
    *Class = 'A';
    dtref = 1.5e-3;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 2.4799822399300195;
    xcrref[1] = 1.1276337964368832;
    xcrref[2] = 1.5028977888770491;
    xcrref[3] = 1.4217816211695179;
    xcrref[4] = 2.1292113035138280;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 1.0900140297820550e-04;
    xceref[1] = 3.7343951769282091e-05;
    xceref[2] = 5.0092785406541633e-05;
    xceref[3] = 4.7671093939528255e-05;
    xceref[4] = 1.3621613399213001e-04;

    //---------------------------------------------------------------------
    // reference data for 102X102X102 grids after 400 time steps,
    // with DT = 1.0e-03
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 102) && (grid_points[1] == 102) &&
              (grid_points[2] == 102) && (no_time_steps == 400) ) {
    *Class = 'B';
    dtref = 1.0e-3;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.6903293579998e+02;
    xcrref[1] = 0.3095134488084e+02;
    xcrref[2] = 0.4103336647017e+02;
    xcrref[3] = 0.3864769009604e+02;
    xcrref[4] = 0.5643482272596e+02;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.9810006190188e-02;
    xceref[1] = 0.1022827905670e-02;
    xceref[2] = 0.1720597911692e-02;
    xceref[3] = 0.1694479428231e-02;
    xceref[4] = 0.1847456263981e-01;

    //---------------------------------------------------------------------
    // reference data for 162X162X162 grids after 400 time steps,
    // with DT = 0.67e-03
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 162) && (grid_points[1] == 162) &&
              (grid_points[2] == 162) && (no_time_steps == 400) ) {
    *Class = 'C';
    dtref = 0.67e-3;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.5881691581829e+03;
    xcrref[1] = 0.2454417603569e+03;
    xcrref[2] = 0.3293829191851e+03;
    xcrref[3] = 0.3081924971891e+03;
    xcrref[4] = 0.4597223799176e+03;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.2598120500183e+00;
    xceref[1] = 0.2590888922315e-01;
    xceref[2] = 0.5132886416320e-01;
    xceref[3] = 0.4806073419454e-01;
    xceref[4] = 0.5483377491301e+00;

    //---------------------------------------------------------------------
    // reference data for 408X408X408 grids after 500 time steps,
    // with DT = 0.3e-03
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 408) && (grid_points[1] == 408) &&
              (grid_points[2] == 408) && (no_time_steps == 500) ) {
    *Class = 'D';
    dtref = 0.30e-3;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.1044696216887e+05;
    xcrref[1] = 0.3204427762578e+04;
    xcrref[2] = 0.4648680733032e+04;
    xcrref[3] = 0.4238923283697e+04;
    xcrref[4] = 0.7588412036136e+04;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.5089471423669e+01;
    xceref[1] = 0.5323514855894e+00;
    xceref[2] = 0.1187051008971e+01;
    xceref[3] = 0.1083734951938e+01;
    xceref[4] = 0.1164108338568e+02;

    //---------------------------------------------------------------------
    // reference data for 1020X1020X1020 grids after 500 time steps,
    // with DT = 0.1e-03
    //---------------------------------------------------------------------
  } else if ( (grid_points[0] == 1020) && (grid_points[1] == 1020) &&
              (grid_points[2] == 1020) && (no_time_steps == 500) ) {
    *Class = 'E';
    dtref = 0.10e-3;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of residual.
    //---------------------------------------------------------------------
    xcrref[0] = 0.6255387422609e+05;
    xcrref[1] = 0.1495317020012e+05;
    xcrref[2] = 0.2347595750586e+05;
    xcrref[3] = 0.2091099783534e+05;
    xcrref[4] = 0.4770412841218e+05;

    //---------------------------------------------------------------------
    // Reference values of RMS-norms of solution error.
    //---------------------------------------------------------------------
    xceref[0] = 0.6742735164909e+02;
    xceref[1] = 0.5390656036938e+01;
    xceref[2] = 0.1680647196477e+02;
    xceref[3] = 0.1536963126457e+02;
    xceref[4] = 0.1575330146156e+03;
  } else {
    *verified = false;
  }

  //---------------------------------------------------------------------
  // verification test for residuals if gridsize is one of 
  // the defined grid sizes above (class .ne. 'U')
  //---------------------------------------------------------------------

  //---------------------------------------------------------------------
  // Compute the difference of solution values and the known reference values.
  //---------------------------------------------------------------------
  for (m = 0; m < 5; m++) {
    xcrdif[m] = fabs((xcr[m]-xcrref[m])/xcrref[m]);
    xcedif[m] = fabs((xce[m]-xceref[m])/xceref[m]);
  }

  //---------------------------------------------------------------------
  // Output the comparison of computed results to known cases.
  //---------------------------------------------------------------------
  if (*Class != 'U') {
    printf(" Verification being performed for class %c\n", *Class);
    printf(" accuracy setting for epsilon = %20.13E\n", epsilon);
    *verified = (fabs(dt-dtref) <= epsilon);
    if (!(*verified)) {  
      *Class = 'U';
      printf(" DT does not match the reference value of %15.8E\n", dtref);
    } 
  } else {
    printf(" Unknown class\n");
  }

  if (*Class != 'U') {
    printf(" Comparison of RMS-norms of residual\n");
  } else {
    printf(" RMS-norms of residual\n");
  }

  for (m = 0; m < 5; m++) {
    if (*Class == 'U') {
      printf("          %2d%20.13E\n", m+1, xcr[m]);
    } else if (xcrdif[m] <= epsilon) {
      printf("          %2d%20.13E%20.13E%20.13E\n",
          m+1, xcr[m], xcrref[m], xcrdif[m]);
    } else  {
      *verified = false;
      printf(" FAILURE: %2d%20.13E%20.13E%20.13E\n",
          m+1, xcr[m], xcrref[m], xcrdif[m]);
    }
  }

  if (*Class != 'U') {
    printf(" Comparison of RMS-norms of solution error\n");
  } else {
    printf(" RMS-norms of solution error\n");
  }
        
  for (m = 0; m < 5; m++) {
    if (*Class == 'U') {
      printf("          %2d%20.13E\n", m+1, xce[m]);
    } else if (xcedif[m] <= epsilon) {
      printf("          %2d%20.13E%20.13E%20.13E\n",
          m+1, xce[m], xceref[m], xcedif[m]);
    } else {
      *verified = false;
      printf(" FAILURE: %2d%20.13E%20.13E%20.13E\n",
          m+1, xce[m], xceref[m], xcedif[m]);
    }
  }
        
  if (*Class == 'U') {
    printf(" No reference values provided\n");
    printf(" No verification performed\n");
  } else if (*verified) {
    printf(" Verification Successful\n");
  } else {
    printf(" Verification failed\n");
  }
}