// Write out distribution function to FILE *out. This used when writing data to generate animation
void print_2d_strobe(FILE *out, int MSIZE, ffloat *host_a0, ffloat *host_a, ffloat *host_b, ffloat host_alpha, ffloat t) {
  ffloat norm = 0;
  ffloat dphi_over_2 = host_dPhi/2.0;
  for( int m = 1; m < host_M+1; m++ ) {
    norm += (nm(host_a,0,m)+nm(host_a,0,m))*dphi_over_2;
  }
  norm *= 2*PI*sqrt(host_alpha);

  int i = 0;
  for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.01 ) {
    for( int m = 1; m < host_M+2; m++ ) {
      ffloat value = 0;
      for( int n = 0; n < host_N+1; n++ ) {
        value  += nm(host_a,n,m)*cos(n*phi_x) + nm(host_b,n,m)*sin(n*phi_x);
      }
      strobe_values[i] = strobe_values[i] + (value<0?0:value);
      //strobe_values[i] = value<0?0:value;
      fprintf(out, "%0.5f %0.5f %0.20f\n", phi_x, phi_y(m), strobe_values[i]);
      //fprintf(out, "%0.5f %0.5f %0.20f\n", phi_x, phi_y(m), value<0?0:value);
      i++;
    }
  }
  fprintf(out, "# norm=%0.20f\n", norm);
  fprintf(out, "# t=%0.20f\n", t);
  printf("# norm=%0.20f\n", norm);
} // end of print_2d_strobe(...)
void print_time_evolution_of_parameters(FILE *out, ffloat norm, ffloat *host_a, ffloat *host_b, int MSIZE,
                                        ffloat host_mu, ffloat host_alpha, ffloat host_E_dc, ffloat host_E_omega,
                                        ffloat host_omega, ffloat *host_av_data, ffloat t)
{
  printf("\n# t=%0.20f norm=%0.20f\n", t, norm);
  ffloat v_dr_inst = 0 ;
  ffloat v_y_inst = 0;
  ffloat m_over_m_x_inst = 0;
  for( int m = 1; m < 2*host_M+2; m++ ) {
    v_dr_inst += nm(host_b,1,m)*host_dPhi;
    v_y_inst  += nm(host_a,0,m)*phi_y(m)*host_dPhi;
    m_over_m_x_inst += nm(host_a,1,m)*host_dPhi;
  }

  ffloat v_dr_multiplier = 2*gsl_sf_bessel_I0(host_mu)*PI*sqrt(host_alpha)/gsl_sf_bessel_In(1, host_mu);
  ffloat v_y_multiplier  = 4*PI*gsl_sf_bessel_I0(host_mu)/gsl_sf_bessel_In(1, host_mu);
  ffloat m_over_multiplier = PI*host_alpha*sqrt(host_alpha);
  v_dr_inst       *= v_dr_multiplier;
  v_y_inst        *= v_y_multiplier;
  m_over_m_x_inst *= m_over_multiplier;

  host_av_data[1] *= v_dr_multiplier;
  host_av_data[2] *= v_y_multiplier;
  host_av_data[3] *= m_over_multiplier;
  host_av_data[4] *= v_dr_multiplier;
  host_av_data[4] /= t;
  host_av_data[5] *= v_dr_multiplier;
  host_av_data[5] /= t;

  fprintf(out, "#E_{dc}                \\tilde{E}_{\\omega}     \\tilde{\\omega}         mu                     v_{dr}/v_{p}         A(\\omega)              NORM     v_{y}/v_{p}    m/m_{x,k}   <v_{dr}/v_{p}>   <v_{y}/v_{p}>    <m/m_{x,k}>  A_{inst}  t    Asin\n");
  fprintf(out, "%0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f\n",
          host_E_dc, host_E_omega, host_omega, host_mu, v_dr_inst, host_av_data[4], norm, v_y_inst,
          m_over_m_x_inst, host_av_data[1], host_av_data[2], host_av_data[3], cos(host_omega*t)*v_dr_inst, t, host_av_data[4]);
} // end of print_time_evolution_of_parameters(...)
// Write out distribution function to FILE *out. This used when writing data to generate animation
void print_2d_data(FILE *out, int MSIZE, ffloat *host_a0, ffloat *host_a, ffloat *host_b, ffloat host_alpha, ffloat t) {
  fprintf(out, "# t=%0.20f\n", t);
  ffloat norm = 0;
  for( int m = 1; m < 2*host_M+2; m++ ) {
    norm += nm(host_a,0,m)*host_dPhi;
  }
  norm *= 2*PI*sqrt(host_alpha);

  for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.01 ) {
    for( int m = 1; m < host_M+2; m++ ) {
      ffloat value = 0;
      //ffloat value0 = 0;
      for( int n = 0; n < host_N+1; n++ ) {
        value  += nm(host_a,n,m)*cos(n*phi_x) + nm(host_b,n,m)*sin(n*phi_x);
      }
      fprintf(out, "%0.5f %0.5f %0.20f\n", phi_x, phi_y(m), value<0?0:value);
    }
  }
  fprintf(out, "# norm=%0.20f\n", norm);
  printf("# norm=%0.20f\n", norm);
} // end of print_2d_data(...)
예제 #4
0
파일: fourier.lax.c 프로젝트: priimak/2dssl
int main(int argc, char *argv[]) {

  int display         = atoi(argv[1]);

  ffloat host_E_dc    = strtod(argv[2], NULL);
  ffloat host_E_omega = strtod(argv[3], NULL);
  ffloat host_omega   = strtod(argv[4], NULL);

  T                   = strtod(argv[5], NULL);
  N                   = atoi(argv[6]);
  PhiYmax             = strtod(argv[7], NULL);
  ffloat B            = strtod(argv[8], NULL);
  t_max               = strtod(argv[9], NULL);

  dPhi = PhiYmax/M;

  printf("# B=%0.20f\n", B);
  printf("# dt=%0.20f dPhiY=%0.20f\n", dt, dPhi);


  ffloat mu = Delta_nu/(2*Kb*T);
  ffloat gamma2 = hbar*hbar/(2*Me*Kb*T*d*d);

  // create a0 and populate it with f0
  ffloat a0[N+1][2*M+3];
  ffloat A = d/(PI*hbar*sqrt(2*PI*Me*Kb*T)*gsl_sf_bessel_I0(mu));
  for( int n=0; n<N+1; n++ ) {
    ffloat a = A*gsl_sf_bessel_In(n, mu)*(n==0?0.5:1);
    for( int m = 0; m < 2*M+3; m++ ) {
      a0[n][m] = a*exp(-gamma2*pow(phi_y(m),2));
    }
  }

  if( display == 0 ) {
    for( int n=0; n<N; n++ ) {
      printf("%d %0.20f\n", n, a0[n][M]);
    }
    return 0;
  }

  if( display == 1 ) {
    for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.025 ) {
      ffloat value = 0;
      for( int n=0; n<N; n++ ) {
        value += a0[n][M+1]*cos(n*phi_x);
      }
      printf("%0.20f %0.20f %0.20f\n", phi_x, value, (d/(2*PI*hbar*gsl_sf_bessel_I0(mu)*sqrt(2*PI*Me*Kb*T)))*exp(mu*cos(phi_x)));
    }
    ffloat norm = 0;
    for( int m = 1; m < 2*M+2; m++ ) {
      norm += a0[0][m]*dPhi;
    }
    printf("# norm=%0.20f\n", norm*hbar/d*20*PI);
    return 0;
  }

  ffloat a[2][N+1][2*M+3];
  ffloat b[2][N+1][2*M+3];
  for( int c = 0; c < 2; c++ ) {
    for( int n = 0; n < N+1; n++ ) {
      for( int m = 0; m < 2*M+3; m++ ) {
        b[c][n][m] = 0;
        a[c][n][m] = a0[n][m];//*exp(-gamma2*pow(phi_y(m),2));
      }
    }
  }
  
  int current = 0; int next = 1;
  const ffloat alpha = Delta_nu*d*d*Me/(2*hbar*hbar);
  const ffloat nu = (1+dt/2);

  const ffloat abdt = alpha*B*dt/(4*dPhi);

  for( ffloat t = 0; t < t_max; t += dt ) {
    #pragma omp parallel for
    for( int m = 1; m < 2*M+2; m++ ) {
      // #pragma omp parallel
      for( int n = 0; n < N; n++ ) {
        /*
        ffloat nu = 1 + dt/2; // good
        ffloat nu2 = nu * nu;
        ffloat mu_t_plus_1 = (host_E_dc + host_E_omega*cos(host_omega*(t+dt)))*n*dt/2;
        ffloat g=dt*a0[n]+a[n]*(1-dt/2)-eE(t)*n*b[n]*dt/2;
        ffloat h=b[n]*(1-dt/2)+eE(t)*n*a[n]*dt/2;
        a[n] = (g*nu-h*mu_t_plus_1)/(nu*nu + mu_t_plus_1*mu_t_plus_1);
        b[n] = (h*nu+g*mu_t_plus_1)/(nu*nu + mu_t_plus_1*mu_t_plus_1);
        */
        //////////

        ffloat beta_t_plus_1 = host_E_dc + host_E_omega*cos(host_omega*(t+dt))+B*phi_y(m);
        ffloat beta_t        = host_E_dc + host_E_omega*cos(host_omega*(t))+B*phi_y(m);

        ffloat mu_t_plus_1   = n*beta_t_plus_1*dt/2;
        ffloat mu_t          = n*beta_t*dt/2;
        ffloat g = dt*a0[n][m] + a[current][n][m]*(1-dt/2) - b[current][n][m]*mu_t 
          + abdt*(b[current][n+1][m+1] - b[current][n+1][m-1] 
          - ( n < 2 ? 0 : ( b[current][n-1][m+1] - b[current][n-1][m-1])));

        ffloat h = b[current][n][m]*(1-dt/2) + a[current][n][m]*mu_t 
          + abdt*((n==1?2:1)*(n==0?0:(a[current][n-1][m+1]-a[current][n-1][m-1]))
          - (a[current][n+1][m+1]-a[current][n+1][m-1]));
                  

        a[next][n][m] = (g*nu-h*mu_t_plus_1)/(nu*nu+mu_t_plus_1*mu_t_plus_1);
        if( n > 0 ) {
          b[next][n][m] = (g*mu_t_plus_1+h*nu)/(nu*nu+mu_t_plus_1*mu_t_plus_1);
        }

        //////////////////////////////////////
        /*
        ffloat g = a[current][n][m] + dt*a0[n][m] + 
          abdt*(b[current][n+1][m+1] - b[current][n+1][m-1] 
          - ( n < 2 ? 0 : ( b[current][n-1][m+1] - b[current][n-1][m-1])));

        ffloat h = b[current][n][m] + 
          abdt*((n==1?2:1)*(n==0?0:(a[current][n-1][m+1]-a[current][n-1][m-1]))
           - (a[current][n+1][m+1]-a[current][n+1][m-1]));

        ffloat beta_t_plus_1 = host_E_dc + host_E_omega*cos(host_omega*(t+dt))+B*phi_y(m);

        ffloat mu = n*beta_t_plus_1*dt;
        a[next][n][m] = (g*nu-h*mu)/(nu*nu+mu*mu);
        if( n > 0 ) {
          b[next][n][m] = (g*mu+h*nu)/(nu*nu+mu*mu);
        }
        */

        ///////////////////////////////////////
        /*
        a[next][n][m] = dt*a0[n][m]
          + (a[current][n][m-1]+a[current][n][m+1])*(1-dt)/2
          - (b[current][n][m-1]+b[current][n][m+1])*n*beta*dt/2
          + alpha*B*dt/(4*dPhi)*(b[current][n+1][m+1] - b[current][n+1][m-1] 
                                 - ( n < 2 ? 0 : ( b[current][n-1][m+1] - b[current][n-1][m-1]) ) 
                                 );

        if( n == 0 ) { continue; }
        // n here is always 1 or greater
        b[next][n][m] = (b[current][n][m-1]+b[current][n][m+1])*(1-dt)/2
          + (a[current][n][m-1]+a[current][n][m+1])*n*beta*dt/2 
          + alpha*B*dt/(4*dPhi)*((n==1?2:1)*(a[current][n-1][m+1]-a[current][n-1][m-1])
                                 - (a[current][n+1][m+1]-a[current][n+1][m-1]));
        */

      }
    }
    #pragma end parallel for
    //printf("%d\n", current);
    if( current == 0 ) { current = 1; next = 0; } else { current = 0; next = 1; }
  }

  if( display == 2 ) {
    for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.025 ) {
      ffloat value = 0; ffloat value0 = 0;
      for( int n=0; n<N; n++ ) {
        value += a[current][n][962]*cos(n*phi_x);// + b[current][n][962]*sin(n*phi_x);
        // value += a0[n][M+1]*cos(n*phi_x);
        value0 += a0[n][962]*cos(n*phi_x);
      }
      printf("%0.20f %0.20f %0.20f\n", phi_x, value, value0); // (d/(2*PI*hbar*gsl_sf_bessel_I0(mu)*sqrt(2*PI*Me*Kb*T)))*exp(mu*cos(phi_x)));
    }
    return 0;
  }

  if( display == 3 ) {
    ffloat value_min = 100;
    int m_min = -1;
    for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.04 ) {
      for( int m = 1; m < 2*M+2; m++ ) {
        ffloat value = 0;
        ffloat value0 = 0;
        for( int n = 0; n < N+1; n++ ) {
          value  += a[current][n][m]*cos(n*phi_x) + b[current][n][m]*sin(n*phi_x);
          value0 += a0[n][m]*cos(n*phi_x);
        }
        printf("%0.20f %0.20f %0.20f %0.20f\n", phi_x, phi_y(m), value<0?0:value, value0);
        if( value < value_min ) { value_min = value; m_min = m; }
      }
      printf("# v_min = %0.20f @ m=%d\n", value_min, m_min);
    }
    return 0;
  }

  if( display == 4 ) {
    ffloat norm = 0;
    for( int m = 1; m < 2*M+2; m++ ) {
      norm += a[current][0][m]*dPhi;
    }
    norm *= hbar*2*PI/(d*d);

    ffloat v_dr_av = 0;
    ffloat v_dr_final = 0;
    for( int m = 1; m < 2*M+2; m++ ) {
      v_dr_final += b[current][1][m]*dPhi;
    }
    v_dr_av = hbar * PI * v_dr_final / ( d * d ); // this is really v_{dr}/v_0

    printf("#E_{dc}                \\tilde{E}_{\\omega}     \\tilde{\\omega}         T                      <v_{dr}/v_{0}>         A(\\omega)              NORM\n");
    printf("%0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f\n", host_E_dc, host_E_omega, host_omega, T, v_dr_av, 0.0, norm);

  }

}
int main(int argc, char **argv) {
  parse_cmd(argc, argv);

  cudaSetDevice(device);

  ffloat T = host_omega>0?(2*PI/host_omega):0; // period of external a/c emf
  if( display == 9 ) {
    t_max = t_start + 101*T;
    init_strobe_array();
  } else {
    t_max = t_start + T;
  }
  if( quiet == 0 ) { printf("# t_max = %0.20f kernel=%d\n", t_max, BLTZM_KERNEL); }

  // we will allocate enough memory to accommodate range
  // from -PhiY_max_range to PhiY_max_range, but will use only
  // part of it from PhiYmin to PhiYmax
  ffloat PhiY_max_range = fabs(PhiYmin);
  if( PhiY_max_range < fabs(PhiYmax) ) {
    PhiY_max_range = fabs(PhiYmax);
  }

  //host_dPhi = PhiY_max_range/host_M;
  host_dPhi = (PhiYmax-PhiYmin)/host_M;

  NSIZE = host_N+1;
  //MSIZE = 2*host_M+3;
  MSIZE = host_M+3;
  PADDED_MSIZE = (MSIZE*sizeof(ffloat))%128==0?MSIZE:((((MSIZE*sizeof(ffloat))/128)*128+128)/sizeof(ffloat));
  printf("PADDED MEMORY FROM %d ELEMENTS PER ROW TO %d\n", MSIZE, (int)PADDED_MSIZE);

  MP1 = host_M+1; // 

  SIZE_2D = NSIZE*PADDED_MSIZE;
  const int SIZE_2Df = SIZE_2D*sizeof(ffloat);

  host_TMSIZE=host_M+1;

  host_nu = 1+host_dt/2;
  host_nu2 = host_nu * host_nu;
  host_nu_tilde = 1-host_dt/2;
  host_bdt = host_B*host_dt/(4*host_dPhi);

  load_data();

  // create a0 and populate it with f0
  ffloat *host_a0; host_a0 = (ffloat *)calloc(SIZE_2D, sizeof(ffloat));
  for( int n=0; n<host_N+1; n++ ) {
    ffloat a = gsl_sf_bessel_In(n, host_mu)*(n==0?0.5:1)/(PI*gsl_sf_bessel_In(0, host_mu))*sqrt(host_mu/(2*PI*host_alpha));
    for( int m = 0; m < host_M+3; m++ ) {
      nm(host_a0, n, m) = a*expl(-host_mu*pow(phi_y(m),2)/2);
    }
  }

  // create device_a0 and transfer data from host_a0 to device_a0
  ffloat *a0;
  HANDLE_ERROR(cudaMalloc((void **)&a0, SIZE_2Df));
  HANDLE_ERROR(cudaMemcpy(a0, host_a0, SIZE_2Df, cudaMemcpyHostToDevice));

  // create a and b 2D vectors, four of each. one for current,
  // another for next pointer on main and shifted grids
  ffloat *host_a = (ffloat *)calloc(SIZE_2D, sizeof(ffloat));
  ffloat *host_b = (ffloat *)calloc(SIZE_2D, sizeof(ffloat));

  ffloat *a[4];
  ffloat *b[4];
  for( int i = 0; i < 4; i++ ) {
    HANDLE_ERROR(cudaMalloc((void **)&a[i], SIZE_2Df));
    HANDLE_ERROR(cudaMalloc((void **)&b[i], SIZE_2Df));

    // zero vector b[i]
    HANDLE_ERROR(cudaMemset((void *)a[i], 0, SIZE_2Df));
    HANDLE_ERROR(cudaMemset((void *)b[i], 0, SIZE_2Df));
  }

  int current = 0; int next = 1;
  int current_hs = 2; int next_hs = 3; // 'hs' - half step

  // init vectors a[0] and a[2]
  HANDLE_ERROR(cudaMemcpy(a[current], host_a0, SIZE_2Df,
                          cudaMemcpyHostToDevice));

  int blocks = (host_M+3)/TH_PER_BLOCK;

  // tiptow to the first half step
  ffloat *host_a_hs = (ffloat *)calloc(SIZE_2D, sizeof(ffloat));
  ffloat *host_b_hs = (ffloat *)calloc(SIZE_2D, sizeof(ffloat));
  ffloat cos_omega_t = 1; // cos(host_omega*t); for t = 0
  ffloat cos_omega_t_plus_dt = cos(host_omega*(host_dt));
  step_on_grid(blocks, a0, a[current], b[current], a[current_hs], b[current_hs],
               a[current], b[current], 0, 0,
               cos_omega_t, cos_omega_t_plus_dt);
  /*
  // temporary solution // FIX ME!!!
  memcpy(host_a_hs, host_a, SIZE_2D*sizeof(ffloat));
  HANDLE_ERROR(cudaMemcpy(a[current_hs], host_a_hs,
                          SIZE_2Df, cudaMemcpyHostToDevice));
  HANDLE_ERROR(cudaMemcpy(b[current_hs], host_b_hs,
                          SIZE_2Df, cudaMemcpyHostToDevice));
  */

  // used for file names when generated data for making animation
  char *file_name_buf = (char *)calloc(128, sizeof(char));

  char buf[16384]; // output buffer for writing frame data when display==77

  int step = 0;
  ffloat frame_time = 0; int frame_number = 1;

  ffloat *host_av_data; host_av_data = (ffloat *)calloc(5, sizeof(ffloat));
  ffloat *av_data;
  HANDLE_ERROR(cudaMalloc((void **)&av_data, 6*sizeof(ffloat)));
  HANDLE_ERROR(cudaMemset((void *)av_data, 0, 6*sizeof(ffloat)));

  float t_hs = 0;

  ffloat t0 = 0;
  ffloat t = t0;
  ffloat timeout = -999;

  ffloat last_tT_reminder = 0;

  for(;;) {
    //read_from
    int ccc = 0;
    for( t = t0; t < t_max; t += host_dt ) {
      /// XXX
      //ccc++;
      //if( ccc == 51 ) { break; }

      t_hs = t + host_dt/2;
      cos_omega_t = cos(host_omega*t);
      cos_omega_t_plus_dt = cos(host_omega*(t+host_dt));
      step_on_grid(blocks, a0, a[current], b[current], a[next], b[next], a[current_hs],
                   b[current_hs], t, t_hs,
                   cos_omega_t, cos_omega_t_plus_dt);

      cudaThreadSynchronize();

      cos_omega_t = cos(host_omega*t_hs);
      cos_omega_t_plus_dt = cos(host_omega*(t_hs+host_dt));
      step_on_half_grid(blocks, a0, a[current], b[current], a[next], b[next], a[current_hs],
                        b[current_hs], a[next_hs], b[next_hs], t, t_hs,
                        cos_omega_t, cos_omega_t_plus_dt);

      /*
      if( t >= 0 ) { 
	HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost));
	HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost));
          sprintf(file_name_buf, "strobe.data");
          FILE *frame_file_stream = fopen(file_name_buf, "w");
          setvbuf(frame_file_stream, buf, _IOFBF, sizeof(buf));
          printf("\nWriting strobe %s\n", file_name_buf);
          print_2d_strobe(frame_file_stream, MSIZE, host_a0, host_a, host_b, host_alpha, t);
          fclose(frame_file_stream);
          frame_time = 0;

	break; } /// XXX REMOVE ME
      */

      if( host_E_omega > 0 && display == 77 && frame_time >= 0.01) {
        // we need to perform averaging of v_dr, m_x and A
        av(blocks, a[next], b[next], av_data, t);
        HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost));
        HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost));
        HANDLE_ERROR(cudaMemcpy(host_av_data, av_data, 6*sizeof(ffloat), cudaMemcpyDeviceToHost));
        ffloat norm = eval_norm(host_a, host_alpha, MSIZE);
        print_time_evolution_of_parameters(out, norm, host_a, host_b, MSIZE,
                                           host_mu, host_alpha, host_E_dc, host_E_omega, host_omega,
                                           host_av_data, t);
        frame_time = 0;
      }

      if( host_E_omega > 0 && display != 7 && display != 77 && display != 8 && t >= t_start ) {
        // we need to perform averaging of v_dr, m_x and A
        av(blocks, a[next], b[next], av_data, t);
      }

      if( current    == 0 ) {    current = 1;    next = 0; } else { current = 0; next = 1; }
      if( current_hs == 2 ) { current_hs = 3; next_hs = 2; } else { current_hs = 2; next_hs = 3; }

      //if( display == 9 && t >= t_start ) {
      //  ffloat tT = t/T;
      //  printf("t=%0.12f %0.12f %0.12f\n", t, , T);
      //}

      if( display == 9 && t >= t_start ) { // XXX PUT ME BACK
        ffloat tT = t/T;
        ffloat tT_reminder = tT-((int)tT);
        if( tT_reminder < last_tT_reminder ) { 
          HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost));
          HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost));
          sprintf(file_name_buf, "strobe%08d.data", frame_number++);
          FILE *frame_file_stream = fopen(file_name_buf, "w");
          setvbuf(frame_file_stream, buf, _IOFBF, sizeof(buf));
          printf("\nWriting strobe %s\n", file_name_buf);
          print_2d_strobe(frame_file_stream, MSIZE, host_a0, host_a, host_b, host_alpha, t);
          fclose(frame_file_stream);
          frame_time = 0;
	}
        last_tT_reminder = tT_reminder;
      }

      if( display == 7 && frame_time >= 0.01 && t > frame_start ) { // we are making movie
        HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost));
        HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost));
        sprintf(file_name_buf, "frame%08d.data", frame_number++);
        FILE *frame_file_stream = fopen(file_name_buf, "w");
        setvbuf(frame_file_stream, buf, _IOFBF, sizeof(buf));
        printf("\nWriting frame %s\n", file_name_buf);
        print_2d_data(frame_file_stream, MSIZE, host_a0, host_a, host_b, host_alpha, t);
        fclose(frame_file_stream);
        frame_time=0;
      }

      if( out != stdout && display != 7 ) {
        step++;
        if( step == 300 ) {
          printf("\rt=%0.9f %0.2f%%", t, t/t_max*100);
          fflush(stdout);
          step = 0;
        }
      }
      frame_time += host_dt;

      if( display == 9 && t <= t_start && frame_time >= T ) {
        frame_time == 0;
      }
    }

    HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost));
    HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost));
    HANDLE_ERROR(cudaMemcpy(host_av_data, av_data, 6*sizeof(ffloat), cudaMemcpyDeviceToHost));

    ffloat norm = 0;
    ffloat dphi_over_2 = host_dPhi/2.0;
    for( int m = 1; m < host_M+1; m++ ) {
      norm += (nm(host_a,0,m)+nm(host_a,0,m))*dphi_over_2;
    }
    norm *= 2*PI*sqrt(host_alpha);

    if( display == 3 ) {
      for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.01 ) {
        for( int m = 1; m < host_M; m++ ) {
          ffloat value = 0;
          ffloat value0 = 0;
          for( int n = 0; n < host_N+1; n++ ) {
            value  += nm(host_a,n,m)*cos(n*phi_x) + nm(host_b,n,m)*sin(n*phi_x);
            value0 += nm(host_a0,n,m)*cos(n*phi_x);
          }
          fprintf(out, "%0.5f %0.5f %0.20f %0.20f\n", phi_x, phi_y(m), value<0?0:value, value0<0?0:value0);
        }
      }
      fprintf(out, "# norm=%0.20f\n", norm);
      printf("# norm=%0.20f\n", norm);
      //if( out != stdout ) { fclose(out); }
      cuda_clean_up();
      return EXIT_SUCCESS;
    }

    if( display == 8 ) {
      // single shot image
      HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost));
      HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost));
      sprintf(file_name_buf, "frame.data");
      FILE *frame_file_stream = fopen(file_name_buf, "w");
      setvbuf(frame_file_stream, buf, _IOFBF, sizeof(buf));
      printf("\nWriting frame %s\n", file_name_buf);
      print_2d_data(frame_file_stream, MSIZE, host_a0, host_a, host_b, host_alpha, t);
      fclose(frame_file_stream);
      frame_time=0;
      return EXIT_SUCCESS;
    }

    if( display == 4 ) {
      if( quiet == 0 ) { printf("\n# norm=%0.20f\n", norm); }
      ffloat v_dr_inst = 0 ;
      ffloat v_y_inst = 0;
      ffloat m_over_m_x_inst = 0;
      for( int m = 1; m < host_M; m++ ) {
        v_dr_inst += nm(host_b,1,m)*host_dPhi;
        v_y_inst  += nm(host_a,0,m)*phi_y(m)*host_dPhi;
        m_over_m_x_inst += nm(host_a,1,m)*host_dPhi;
      }

      ffloat v_dr_multiplier = 2*gsl_sf_bessel_I0(host_mu)*PI*sqrt(host_alpha)/gsl_sf_bessel_In(1, host_mu);
      ffloat v_y_multiplier  = 4*PI*gsl_sf_bessel_I0(host_mu)/gsl_sf_bessel_In(1, host_mu);
      ffloat m_over_multiplier = PI*host_alpha*sqrt(host_alpha);
      v_dr_inst       *= v_dr_multiplier;
      v_y_inst        *= v_y_multiplier;
      m_over_m_x_inst *= m_over_multiplier;

      host_av_data[1] *= v_dr_multiplier;
      host_av_data[2] *= v_y_multiplier;
      host_av_data[3] *= m_over_multiplier;
      host_av_data[4] *= v_dr_multiplier;
      host_av_data[4] /= T;
      host_av_data[5] *= v_dr_multiplier;
      host_av_data[5] /= T;

      fprintf(out, "# display=%d E_dc=%0.20f E_omega=%0.20f omega=%0.20f mu=%0.20f alpha=%0.20f n-harmonics=%d PhiYmin=%0.20f PhiYmax=%0.20f B=%0.20f t-max=%0.20f dt=%0.20f g-grid=%d\n",
                      display,   host_E_dc,  host_E_omega,  host_omega,  host_mu,  host_alpha,  host_N,        PhiYmin,       PhiYmax,       host_B,  t_start,     host_dt,  host_M);
      fprintf(out, "#E_{dc}                \\tilde{E}_{\\omega}     \\tilde{\\omega}         mu                     v_{dr}/v_{p}         A(\\omega)              NORM     v_{y}/v_{p}    m/m_{x,k}   <v_{dr}/v_{p}>   <v_{y}/v_{p}>    <m/m_{x,k}>    Asin\n");
      fprintf(out, "%0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f\n",
              host_E_dc, host_E_omega, host_omega, host_mu, v_dr_inst, host_av_data[4], norm, v_y_inst,
              m_over_m_x_inst, host_av_data[1], host_av_data[2], host_av_data[3], host_av_data[5]);
    }

    if( read_from == NULL ) { break; }

    // scan for new parameters
    timeout = scan_for_new_parameters();
    if( timeout < -900 ) { break; } // user entered 'exit'
    t_start = t + timeout;
    t_max = t_start + T;
    t0 = t + host_dt;
    T=host_omega>0?(2*PI/host_omega):0;
    load_data(); // re-load data
    HANDLE_ERROR(cudaMemset((void *)av_data, 0, 6*sizeof(ffloat))); // clear averaging data
    if( quiet == 0 ) { printf("# t_max = %0.20f\n", t_max); }
  } // for(;;)

  if( out != NULL && out != stdout ) {
    fclose(out);
  }
  cuda_clean_up();
  return EXIT_SUCCESS;
} // end of main(...)
예제 #6
0
// All-purpose routine for computing the L2-projection
// of various functions onto the gradient of the Legendre basis
//     (Unstructured grid version)
//
void L2ProjectGrad_Unst(
    const dTensor2* vel_vec,
    const int istart, 
    const int iend, 
    const int QuadOrder, 
    const int BasisOrder_qin,
    const int BasisOrder_auxin,
    const int BasisOrder_fout,
    const mesh& Mesh, 
    const dTensor3* qin, 
    const dTensor3* auxin, 
    dTensor3* fout, 
    void (*Func)(const dTensor2* vel_vec,
        const dTensor2&,const dTensor2&,
        const dTensor2&,dTensor3&))
{
    // starting and ending indeces
    const int   NumElems = Mesh.get_NumElems();
    assert_ge(istart,1);
    assert_le(iend,NumElems);

    // qin variable
    assert_eq(NumElems,qin->getsize(1));
    const int     meqn = qin->getsize(2);
    const int kmax_qin = qin->getsize(3);
    assert_eq(kmax_qin,(BasisOrder_qin*(BasisOrder_qin+1))/2);

    // auxin variable
    assert_eq(NumElems,auxin->getsize(1));
    const int       maux = auxin->getsize(2);
    const int kmax_auxin = auxin->getsize(3);
    assert_eq(kmax_auxin,(BasisOrder_auxin*(BasisOrder_auxin+1))/2);

    // fout variables
    assert_eq(NumElems,fout->getsize(1));
    const int mcomps_out = fout->getsize(2);
    const int  kmax_fout = fout->getsize(3);
    assert_eq(kmax_fout,(BasisOrder_fout*(BasisOrder_fout+1))/2);

    // number of quadrature points
    assert_ge(QuadOrder,1);
    assert_le(QuadOrder,5);
    int mpoints;
    switch ( QuadOrder )
    {
        case 1:
            mpoints = 0;
            break;

        case 2:
            mpoints = 1;
            break;

        case 3:
            mpoints = 6;
            break;

        case 4:
            mpoints = 7;
            break;

        case 5:	     
            mpoints = 16;
            break;
    }

    // trivial case
    if ( QuadOrder==1 )
    {
        for (int i=istart; i<=iend; i++)
        for (int m=1; m<=mcomps_out; m++) 
        for (int k=1; k<=kmax_fout; k++) 
        {  fout->set(i,m,k, 0.0 );  }
    }
    else
    {
        const int kmax = iMax(iMax(kmax_qin,kmax_auxin),kmax_fout);
        dTensor2    spts(mpoints,2);
        dTensor1    wgts(mpoints);
        dTensor2    xpts(mpoints,2);
        dTensor2   qvals(mpoints,meqn);
        dTensor2 auxvals(mpoints,maux);
        dTensor3   fvals(mpoints,mcomps_out,2);
        dTensor2      mu(mpoints,kmax); // monomial basis (non-orthogonal)
        dTensor2     phi(mpoints,kmax); // Legendre basis (orthogonal)
        dTensor2   mu_xi(mpoints,kmax_fout);   //  xi-derivative of monomial basis (non-orthogonal)
        dTensor2  mu_eta(mpoints,kmax_fout);   // eta-derivative of monomial basis (non-orthogonal)
        dTensor2  phi_xi(mpoints,kmax_fout);   //  xi-derivative of Legendre basis (orthogonal)
        dTensor2 phi_eta(mpoints,kmax_fout);   // eta-derivative of Legendre basis (orthogonal)
        dTensor2   phi_x(mpoints,kmax_fout);   //   x-derivative of Legendre basis (orthogonal)
        dTensor2   phi_y(mpoints,kmax_fout);   //   y-derivative of Legendre basis (orthogonal)

        switch ( QuadOrder )
        {
            case 2:
                spts.set(1,1, 0.0 );
                spts.set(1,2, 0.0 );

                wgts.set(1, 0.5 );
                break;

            case 3:
                spts.set(1,1,  0.112615157582632 );
                spts.set(1,2,  0.112615157582632 );

                spts.set(2,1, -0.225230315165263 );
                spts.set(2,2,  0.112615157582632 );

                spts.set(3,1,  0.112615157582632 );
                spts.set(3,2, -0.225230315165263 );

                spts.set(4,1, -0.241757119823562 );
                spts.set(4,2, -0.241757119823562 );

                spts.set(5,1,  0.483514239647126 );
                spts.set(5,2, -0.241757119823562 );

                spts.set(6,1, -0.241757119823562 );
                spts.set(6,2,  0.483514239647126 );

                wgts.set(1, 0.1116907948390055 );
                wgts.set(2, 0.1116907948390055 );
                wgts.set(3, 0.1116907948390055 );
                wgts.set(4, 0.0549758718276610 );
                wgts.set(5, 0.0549758718276610 );
                wgts.set(6, 0.0549758718276610 );
                break;

            case 4:
                spts.set(1,1,   0.000000000000000 );
                spts.set(1,2,   0.000000000000000 );

                spts.set(2,1,   0.136808730771782 );
                spts.set(2,2,   0.136808730771782 );

                spts.set(3,1,  -0.273617461543563 );
                spts.set(3,2,   0.136808730771782 );

                spts.set(4,1,   0.136808730771782 );
                spts.set(4,2,  -0.273617461543563 );

                spts.set(5,1,  -0.232046826009877 );
                spts.set(5,2,  -0.232046826009877 );

                spts.set(6,1,   0.464093652019754 );
                spts.set(6,2,  -0.232046826009877 );

                spts.set(7,1,  -0.232046826009877 );
                spts.set(7,2,   0.464093652019754 );	 

                wgts.set(1,  0.1125000000000000 );
                wgts.set(2,  0.0661970763942530 );
                wgts.set(3,  0.0661970763942530 );
                wgts.set(4,  0.0661970763942530 );
                wgts.set(5,  0.0629695902724135 );
                wgts.set(6,  0.0629695902724135 );
                wgts.set(7,  0.0629695902724135 );
                break;

            case 5:
                spts.set(1,1,   0.000000000000000 );
                spts.set(1,2,   0.000000000000000 );

                spts.set(2,1,   0.125959254959390 );
                spts.set(2,2,   0.125959254959390 );

                spts.set(3,1,  -0.251918509918779 );
                spts.set(3,2,   0.125959254959390 );

                spts.set(4,1,   0.125959254959390 );
                spts.set(4,2,  -0.251918509918779 );

                spts.set(5,1,  -0.162764025581573 );
                spts.set(5,2,  -0.162764025581573 );

                spts.set(6,1,   0.325528051163147 );
                spts.set(6,2,  -0.162764025581573 );

                spts.set(7,1,  -0.162764025581573 );
                spts.set(7,2,   0.325528051163147 );

                spts.set(8,1,  -0.282786105016302 );
                spts.set(8,2,  -0.282786105016302 );

                spts.set(9,1,   0.565572210032605 );
                spts.set(9,2,  -0.282786105016302 );

                spts.set(10,1, -0.282786105016302 );
                spts.set(10,2,  0.565572210032605 );

                spts.set(11,1, -0.324938555923375 );
                spts.set(11,2, -0.070220503698695 );

                spts.set(12,1, -0.324938555923375 );
                spts.set(12,2,  0.395159059622071 );

                spts.set(13,1, -0.070220503698695 );
                spts.set(13,2, -0.324938555923375 );

                spts.set(14,1, -0.070220503698695 );
                spts.set(14,2,  0.395159059622071 );

                spts.set(15,1,  0.395159059622071 );
                spts.set(15,2, -0.324938555923375 );

                spts.set(16,1,  0.395159059622071 );
                spts.set(16,2, -0.070220503698695 );

                wgts.set(1,  0.0721578038388935 );
                wgts.set(2,  0.0475458171336425 );
                wgts.set(3,  0.0475458171336425 );
                wgts.set(4,  0.0475458171336425 );
                wgts.set(5,  0.0516086852673590 );
                wgts.set(6,  0.0516086852673590 );
                wgts.set(7,  0.0516086852673590 );
                wgts.set(8,  0.0162292488115990 );
                wgts.set(9,  0.0162292488115990 );
                wgts.set(10, 0.0162292488115990 );
                wgts.set(11, 0.0136151570872175 );
                wgts.set(12, 0.0136151570872175 );
                wgts.set(13, 0.0136151570872175 );
                wgts.set(14, 0.0136151570872175 );
                wgts.set(15, 0.0136151570872175 );
                wgts.set(16, 0.0136151570872175 );
                break;
        }

        // Loop over each quadrature point and construct monomial polys
        for (int m=1; m<=mpoints; m++)
        {
            // coordinates
            const double xi   = spts.get(m,1);      
            const double xi2  = xi*xi;
            const double xi3  = xi2*xi;
            const double xi4  = xi3*xi;
            const double eta  = spts.get(m,2);
            const double eta2 = eta*eta;
            const double eta3 = eta2*eta;
            const double eta4 = eta3*eta;      

            // monomial basis functions at each gaussian quadrature point
            switch( kmax )
            {
                case 15:  // fifth order		    		    
                    mu.set(m, 15, eta4     );
                    mu.set(m, 14, xi4      );
                    mu.set(m, 13, xi2*eta2 );
                    mu.set(m, 12, eta3*xi  );
                    mu.set(m, 11, xi3*eta  );

                case 10:  // fourth order
                    mu.set(m, 10, eta3     );
                    mu.set(m, 9,  xi3      );
                    mu.set(m, 8,  xi*eta2  );
                    mu.set(m, 7,  eta*xi2  );

                case 6:  // third order
                    mu.set(m, 6,  eta2     );
                    mu.set(m, 5,  xi2      );
                    mu.set(m, 4,  xi*eta   );		    

                case 3:  // second order		    
                    mu.set(m, 3, eta       );
                    mu.set(m, 2, xi        );

                case 1:  // first order
                    mu.set(m, 1, 1.0       );

                    break;		    
            }

            // Loop over each quadrature point and construct Legendre polys
            for (int i=1; i<=kmax; i++)
            {
                double tmp = 0.0;
                for (int j=1; j<=i; j++)
                {  tmp = tmp + Mmat[i-1][j-1]*mu.get(m,j);  }

                phi.set(m,i, tmp );
            }	

            // Gradient of monomial basis functions at each gaussian quadrature point
            switch( kmax_fout )
            {
                case 15:  // fifth order
                    mu_xi.set( m,15,  0.0         );
                    mu_xi.set( m,14,  4.0*xi3     );
                    mu_xi.set( m,13,  2.0*xi*eta2 );
                    mu_xi.set( m,12,  eta3        );
                    mu_xi.set( m,11,  3.0*xi2*eta );

                    mu_eta.set( m,15, 4.0*eta3    );
                    mu_eta.set( m,14, 0.0         );
                    mu_eta.set( m,13, 2.0*xi2*eta );
                    mu_eta.set( m,12, 3.0*eta2*xi );
                    mu_eta.set( m,11, xi3 );

                case 10:  // fourth order
                    mu_xi.set( m,10,  0.0        );
                    mu_xi.set( m,9,   3.0*xi2    );			
                    mu_xi.set( m,8,   eta2       );
                    mu_xi.set( m,7,   2.0*eta*xi );

                    mu_eta.set( m,10, 3.0*eta2   );
                    mu_eta.set( m,9,  0.0        );
                    mu_eta.set( m,8,  2.0*eta*xi );
                    mu_eta.set( m,7,  xi2        );

                case 6:  // third order
                    mu_xi.set( m,6,  0.0      );
                    mu_xi.set( m,5,  2.0*xi   );			
                    mu_xi.set( m,4,  eta      );

                    mu_eta.set( m,6,  2.0*eta );			
                    mu_eta.set( m,5,  0.0     );
                    mu_eta.set( m,4,  xi      );

                case 3:  // second order
                    mu_xi.set( m,3,  0.0 );
                    mu_xi.set( m,2,  1.0 );

                    mu_eta.set( m,3, 1.0 );
                    mu_eta.set( m,2, 0.0 );

                case 1:  // first order
                    mu_xi.set( m,1,  0.0 );

                    mu_eta.set( m,1, 0.0 );
                    break;
            }

            // Loop over each quadrature point and construct Legendre polys
            for (int i=1; i<=kmax_fout; i++)
            {
                double tmp1 = 0.0;
                double tmp2 = 0.0;
                for (int j=1; j<=i; j++)
                {  
                    tmp1 = tmp1 + Mmat[i-1][j-1]*mu_xi.get(m,j);  
                    tmp2 = tmp2 + Mmat[i-1][j-1]*mu_eta.get(m,j);
                }

                phi_xi.set(m,i,  tmp1 );
                phi_eta.set(m,i, tmp2 );
            }
        }

        // -------------------------------------------------------------
        // Loop over every grid cell indexed by user supplied parameters
        // described by istart...iend
        // -------------------------------------------------------------
#pragma omp parallel for
        for (int i=istart; i<=iend; i++)
        {	  
            // Find center of current cell
            const int i1 = Mesh.get_tnode(i,1);
            const int i2 = Mesh.get_tnode(i,2);
            const int i3 = Mesh.get_tnode(i,3);
            const double x1 = Mesh.get_node(i1,1);
            const double y1 = Mesh.get_node(i1,2);
            const double x2 = Mesh.get_node(i2,1);
            const double y2 = Mesh.get_node(i2,2);
            const double x3 = Mesh.get_node(i3,1);
            const double y3 = Mesh.get_node(i3,2);

            const double xc = (x1+x2+x3)/3.0;
            const double yc = (y1+y2+y3)/3.0;

            // Compute q, aux and fvals at each Gaussian Quadrature point
            // for this current cell indexed by (i,j)
            // Save results into dTensor2 qvals, auxvals and fvals.
            for (int m=1; m<=mpoints; m++)
            {
                // convert phi_xi and phi_eta derivatives
                // to phi_x and phi_y derivatives through Jacobian
                for (int k=1; k<=kmax_fout; k++)
                {
                    phi_x.set(m,k, Mesh.get_jmat(i,1,1)*phi_xi.get(m,k)
                            + Mesh.get_jmat(i,1,2)*phi_eta.get(m,k) );
                    phi_y.set(m,k, Mesh.get_jmat(i,2,1)*phi_xi.get(m,k)
                            + Mesh.get_jmat(i,2,2)*phi_eta.get(m,k) );
                }

                // point on the unit triangle
                const double s = spts.get(m,1);
                const double t = spts.get(m,2);

                // point on the physical triangle
                xpts.set(m,1, xc + (x2-x1)*s + (x3-x1)*t );
                xpts.set(m,2, yc + (y2-y1)*s + (y3-y1)*t );

                // Solution values (q) at each grid point
                for (int me=1; me<=meqn; me++)
                {
                    qvals.set(m,me, 0.0 );

                    for (int k=1; k<=kmax_qin; k++)
                    {
                        qvals.set(m,me, qvals.get(m,me) 
                                + phi.get(m,k) * qin->get(i,me,k) );
                    }
                }

                // Auxiliary values (aux) at each grid point
                for (int ma=1; ma<=maux; ma++)
                {
                    auxvals.set(m,ma, 0.0 );

                    for (int k=1; k<=kmax_auxin; k++)
                    {
                        auxvals.set(m,ma, auxvals.get(m,ma) 
                                + phi.get(m,k) * auxin->get(i,ma,k) );
                    }
                } 
            }

            // Call user-supplied function to set fvals
            Func(vel_vec, xpts, qvals, auxvals, fvals);

            // Evaluate integral on current cell (project onto Legendre basis) 
            // using Gaussian Quadrature for the integration
            for (int m1=1; m1<=mcomps_out; m1++)		
            for (int m2=1; m2<=kmax_fout; m2++)
            {
                double tmp = 0.0;
                for (int k=1; k<=mpoints; k++)
                {
                    tmp = tmp + wgts.get(k)*
                        ( fvals.get(k,m1,1)*phi_x.get(k,m2) +
                          fvals.get(k,m1,2)*phi_y.get(k,m2) );
                }
                fout->set(i, m1, m2,  2.0*tmp );
            }

        }
    }
}
예제 #7
0
// Modified version of the all purpose routine L2Project specifically written
// for projecting the "time-averaged" flux function onto the basis function.
//
// This routine also returns the coefficients of the Lax Wendroff Flux
// Function when expanded with legendre basis functions, and therefore the
// basis expansions produced by this routine can be used for all of the
// Riemann solves.
//
// ---------------------------------------------------------------------
// Inputs should have the following sizes:   
//           TODO - document the inputs here
// ---------------------------------------------------------------------
void L2ProjectLxW_Unst( const int mterms,
        const double alpha, const double beta_dt, const double charlie_dt,
        const int istart, const int iend,               // Start-stop indices
        const int QuadOrder,
        const int BasisOrder_qin,
        const int BasisOrder_auxin,
        const int BasisOrder_fout,
        const mesh& Mesh, 
        const dTensor3* qin, const dTensor3* auxin,     // state vector
        dTensor3* F, dTensor3* G,                       // time-averaged Flux function
        void FluxFunc (const dTensor2& xpts, 
            const dTensor2& Q, const dTensor2& Aux, dTensor3& flux),
        void DFluxFunc (const dTensor2& xpts, 
            const dTensor2& Q, const dTensor2& aux, dTensor4& Dflux),
        void D2FluxFunc (const dTensor2& xpts, 
            const dTensor2& Q, const dTensor2& aux, dTensor5& D2flux) )
{    

    if( fabs( alpha ) < 1e-14 && fabs( beta_dt ) < 1e-14 && fabs( charlie_dt ) < 1e-14 )
    {
        F->setall(0.);
        G->setall(0.);
        return;
    }

    // starting and ending indices 
    const int   NumElems = Mesh.get_NumElems();
    assert_ge(istart,1);
    assert_le(iend,NumElems);

    // qin variable
    assert_eq(NumElems,qin->getsize(1));
    const int     meqn = qin->getsize(2);
    const int kmax_qin = qin->getsize(3);
    assert_eq(kmax_qin,(BasisOrder_qin*(BasisOrder_qin+1))/2);

    // auxin variable
    assert_eq(NumElems,auxin->getsize(1));
    const int       maux = auxin->getsize(2);
    const int kmax_auxin = auxin->getsize(3);
    assert_eq(kmax_auxin,(BasisOrder_auxin*(BasisOrder_auxin+1))/2);

    // fout variables
    assert_eq(NumElems,    F->getsize(1));
    const int mcomps_out = F->getsize(2);
    const int  kmax_fout = F->getsize(3);
    assert_eq(kmax_fout, (BasisOrder_fout*(BasisOrder_fout+1))/2 );

    // number of quadrature points
    assert_ge(QuadOrder, 1);
    assert_le(QuadOrder, 5);

    // Number of quadrature points
    int mpoints;
    switch( QuadOrder )
    {
        case 1:
            mpoints = 1;
            break;

        case 2:
            mpoints = 3;
            break;

        case 3:
            mpoints = 6;
            break;

        case 4:
            mpoints = 12;
            break;

        case 5:	     
            mpoints = 16;
            break;
    }

    const int kmax = iMax(iMax(kmax_qin, kmax_auxin), kmax_fout);
    dTensor2  phi(mpoints, kmax); // Legendre basis (orthogonal)
    dTensor2 spts(mpoints, 2);    // List of quadrature points
    dTensor1 wgts(mpoints);       // List of quadrature weights

    setQuadPoints_Unst( QuadOrder, wgts, spts );

    // ---------------------------------------------------------------------- //
    // Evaluate the basis functions at each point
    SetLegendreAtPoints_Unst(spts, phi);
    // ---------------------------------------------------------------------- //

    // ---------------------------------------------------------------------- //
    // First-order derivatives
    dTensor2 phi_xi (mpoints, kmax );
    dTensor2 phi_eta(mpoints, kmax );
    SetLegendreGrad_Unst( spts, phi_xi, phi_eta );
    // ---------------------------------------------------------------------- //

    // ---------------------------------------------------------------------- //
    // Second-order derivatives
    dTensor2 phi_xi2  (mpoints, kmax );
    dTensor2 phi_xieta(mpoints, kmax );
    dTensor2 phi_eta2 (mpoints, kmax );
    LegendreDiff2_Unst(spts, &phi_xi2, &phi_xieta, &phi_eta2 );
    // ---------------------------------------------------------------------- //

    // ------------------------------------------------------------- //
    // Loop over every grid cell indexed by user supplied parameters //
    // described by istart...iend, jstart...jend                     // 
    // ------------------------------------------------------------- //
#pragma omp parallel for
    for (int i=istart; i<=iend; i++)
    {

        // These need to be defined locally.  Each mesh element carries its
        // own change of basis matrix, so these need to be recomputed for
        // each element.  The canonical derivatives, phi_xi, and phi_eta can
        // be computed and shared for each element.

        // First-order derivatives
        dTensor2   phi_x(mpoints, kmax_fout);   //   x-derivative of Legendre basis (orthogonal)
        dTensor2   phi_y(mpoints, kmax_fout);   //   y-derivative of Legendre basis (orthogonal)

        // Second-order derivatives
        dTensor2   phi_xx(mpoints, kmax_fout);   //   xx-derivative of Legendre basis (orthogonal)
        dTensor2   phi_xy(mpoints, kmax_fout);   //   xy-derivative of Legendre basis (orthogonal)
        dTensor2   phi_yy(mpoints, kmax_fout);   //   yy-derivative of Legendre basis (orthogonal)

        //find center of current cell
        const int    i1 = Mesh.get_tnode(i,1);
        const int    i2 = Mesh.get_tnode(i,2);
        const int    i3 = Mesh.get_tnode(i,3);

        // Corners:
        const double x1 = Mesh.get_node(i1,1);
        const double y1 = Mesh.get_node(i1,2);
        const double x2 = Mesh.get_node(i2,1);
        const double y2 = Mesh.get_node(i2,2);
        const double x3 = Mesh.get_node(i3,1);
        const double y3 = Mesh.get_node(i3,2);

        // Center of current cell:
        const double xc = (x1+x2+x3)/3.0;
        const double yc = (y1+y2+y3)/3.0;

        // Variables that need to be written to, and therefore are 
        // created for each thread
        dTensor2 xpts   (mpoints, 2);
        dTensor2 qvals  (mpoints, meqn);
        dTensor2 auxvals(mpoints, maux);

        // local storage for Flux function its Jacobian, and the Hessian:
        dTensor3    fvals(mpoints,             meqn, 2);  // flux function (vector)
        dTensor4        A(mpoints,       meqn, meqn, 2);  // Jacobian of flux
        dTensor5        H(mpoints, meqn, meqn, meqn, 2);  // Hessian of flux

        // Compute q, aux and fvals at each Gaussian Quadrature point
        // for this current cell indexed by (i,j)
        // Save results into dTensor2 qvals, auxvals and fvals.
        for (int m=1; m<= mpoints; m++)
        {

            // convert phi_xi and phi_eta derivatives
            // to phi_x and phi_y derivatives through Jacobian
            //
            // Note that: 
            //
            //     pd_x = J11 pd_xi + J12 pd_eta and
            //     pd_y = J21 pd_xi + J22 pd_eta.
            //
            // Squaring these operators yields the second derivatives.
            for (int k=1; k<=kmax_fout; k++)
            {
                phi_x.set(m,k, Mesh.get_jmat(i,1,1)*phi_xi.get(m,k)
                             + Mesh.get_jmat(i,1,2)*phi_eta.get(m,k) );
                phi_y.set(m,k, Mesh.get_jmat(i,2,1)*phi_xi.get(m,k)
                             + Mesh.get_jmat(i,2,2)*phi_eta.get(m,k) );

                phi_xx.set(m,k, Mesh.get_jmat(i,1,1)*Mesh.get_jmat(i,1,1)*phi_xi2.get(m,k)
                              + Mesh.get_jmat(i,1,1)*Mesh.get_jmat(i,1,2)*phi_xieta.get(m,k)
                              + Mesh.get_jmat(i,1,2)*Mesh.get_jmat(i,1,2)*phi_eta2.get(m,k)
                           );

                phi_xy.set(m,k, Mesh.get_jmat(i,1,1)*Mesh.get_jmat(i,2,1)*phi_xi2.get(m,k)
                             +(Mesh.get_jmat(i,1,2)*Mesh.get_jmat(i,2,1)
                             + Mesh.get_jmat(i,1,1)*Mesh.get_jmat(i,2,2))*phi_xieta.get(m,k)
                             + Mesh.get_jmat(i,1,2)*Mesh.get_jmat(i,2,2)*phi_eta2.get(m,k)
                           );

                phi_yy.set(m,k, Mesh.get_jmat(i,2,1)*Mesh.get_jmat(i,2,1)*phi_xi2.get(m,k)
                              + Mesh.get_jmat(i,2,1)*Mesh.get_jmat(i,2,2)*phi_xieta.get(m,k)
                              + Mesh.get_jmat(i,2,2)*Mesh.get_jmat(i,2,2)*phi_eta2.get(m,k)
                           );
            }

            // point on the unit triangle
            const double s = spts.get(m,1);
            const double t = spts.get(m,2);

            // point on the physical triangle
            xpts.set(m,1, xc + (x2-x1)*s + (x3-x1)*t );
            xpts.set(m,2, yc + (y2-y1)*s + (y3-y1)*t );

            // Solution values (q) at each grid point
            for (int me=1; me<=meqn; me++)
            {
                qvals.set(m,me, 0.0 );
                for (int k=1; k<=kmax_qin; k++)
                {
                    qvals.set(m,me, qvals.get(m,me) 
                            + phi.get(m,k) * qin->get(i,me,k) );
                }
            }

            // Auxiliary values (aux) at each grid point
            for (int ma=1; ma<=maux; ma++)
            {
                auxvals.set(m,ma, 0.0 );
                for (int k=1; k<=kmax_auxin; k++)
                {
                    auxvals.set(m,ma, auxvals.get(m,ma) 
                            + phi.get(m,k) * auxin->get(i,ma,k) );
                }
            } 
        }

        // ----------------------------------------------------------------- //
        //
        // Part I:
        //
        // Project the flux function onto the basis 
        // functions.  This is the term of order O( 1 ) in the
        // "time-averaged" Taylor expansion of f and g.
        //
        // ----------------------------------------------------------------- //

        // Call user-supplied function to set fvals
        FluxFunc(xpts, qvals, auxvals, fvals);

        // Evaluate integral on current cell (project onto Legendre basis) 
        // using Gaussian Quadrature for the integration
        //
        // TODO - do we want to optimize this by looking into using transposes,
        // as has been done in the 2d/cart code? (5/14/2014) -DS
        for (int me=1; me<=mcomps_out; me++)		
        for (int k=1; k<=kmax; k++)
        {
            double tmp1 = 0.0;
            double tmp2 = 0.0;
            for (int mp=1; mp <= mpoints; mp++)
            {
                tmp1 += wgts.get(mp)*fvals.get(mp, me, 1)*phi.get(mp, k);
                tmp2 += wgts.get(mp)*fvals.get(mp, me, 2)*phi.get(mp, k);
            }
            F->set(i, me, k,  2.0*tmp1 );
            G->set(i, me, k,  2.0*tmp2 );
        }

        // ----------------------------------------------------------------- //
        //
        // Part II:
        //
        // Project the derivative of the flux function onto the basis 
        // functions.  This is the term of order O( \dt ) in the
        // "time-averaged" Taylor expansion of f and g.
        //
        // ----------------------------------------------------------------- //

        // ----------------------------------------------------------------- //
        // Compute pointwise values for fx+gy:
        //
        // We can't multiply fvals of f, and g,
        // by alpha, otherwise we compute the wrong derivative here!
        //
        dTensor2 fx_plus_gy( mpoints, meqn ); fx_plus_gy.setall(0.);
        for( int mp=1; mp <= mpoints; mp++ )
        for( int me=1; me <= meqn; me++ )
        {
            double tmp = 0.;
            for( int k=2; k <= kmax; k++ )                
            {
                tmp += F->get( i, me, k ) * phi_x.get( mp, k );
                tmp += G->get( i, me, k ) * phi_y.get( mp, k );
            }
            fx_plus_gy.set( mp, me, tmp );
        }

        // Call user-supplied Jacobian to set f'(q) and g'(q):
        DFluxFunc( xpts, qvals, auxvals, A );

        // place-holders for point values of
        // f'(q)( fx + gy ) and g'(q)( fx + gy ):
        dTensor2 dt_times_fdot( mpoints, meqn );
        dTensor2 dt_times_gdot( mpoints, meqn );

        // Compute point values for f'(q) * (fx+gy) and g'(q) * (fx+gy):
        for( int mp=1; mp <= mpoints; mp++ )
        for( int m1=1; m1 <= meqn; m1++ )
        {
            double tmp1 = 0.;
            double tmp2 = 0.;
            for( int m2=1; m2 <= meqn; m2++ )
            {
                tmp1 += A.get(mp, m1, m2, 1 ) * fx_plus_gy.get(mp, m2);
                tmp2 += A.get(mp, m1, m2, 2 ) * fx_plus_gy.get(mp, m2);
            }
            dt_times_fdot.set( mp, m1, -beta_dt*tmp1 );
            dt_times_gdot.set( mp, m1, -beta_dt*tmp2 );
        }

        // ---  Third-order terms --- //
        //
        // These are the terms that are O( \dt^2 ) in the "time-averaged"
        // flux function.
        dTensor2 f_tt( mpoints, meqn );   f_tt.setall(0.);
        dTensor2 g_tt( mpoints, meqn );   g_tt.setall(0.);
        if( mterms > 2 )
        {

            // Construct the Hessian at each (quadrature) point
            D2FluxFunc( xpts, qvals, auxvals, H );

            // Second-order derivative terms
            dTensor2 qx_vals (mpoints, meqn);   qx_vals.setall(0.);
            dTensor2 qy_vals (mpoints, meqn);   qy_vals.setall(0.);

            dTensor2 fxx_vals(mpoints, meqn);   fxx_vals.setall(0.);
            dTensor2 gxx_vals(mpoints, meqn);   gxx_vals.setall(0.);

            dTensor2 fxy_vals(mpoints, meqn);   fxy_vals.setall(0.);
            dTensor2 gxy_vals(mpoints, meqn);   gxy_vals.setall(0.);

            dTensor2 fyy_vals(mpoints, meqn);   fyy_vals.setall(0.);
            dTensor2 gyy_vals(mpoints, meqn);   gyy_vals.setall(0.);

            for( int m=1; m <= mpoints; m++ )
            for( int me=1; me <= meqn; me++ )
            {
                // Can start at k=1, because derivative of a constant is
                // zero.
                double tmp_qx = 0.;
                double tmp_qy = 0.;
                for( int  k=2; k <= kmax; k++   )
                {
                    tmp_qx += phi_x.get(m,k) * qin->get(i,me,k);
                    tmp_qy += phi_y.get(m,k) * qin->get(i,me,k);
                }
                qx_vals.set(m,me, tmp_qx );
                qy_vals.set(m,me, tmp_qy );

                // First non-zero terms start at third-order.
                for( int  k=4; k <= kmax; k++   )
                {
                    fxx_vals.set(m,me, fxx_vals.get(m,me) + phi_xx.get(m,k)*F->get(i,me,k) );
                    gxx_vals.set(m,me, gxx_vals.get(m,me) + phi_xx.get(m,k)*G->get(i,me,k) );

                    fxy_vals.set(m,me, fxy_vals.get(m,me) + phi_xy.get(m,k)*F->get(i,me,k) );
                    gxy_vals.set(m,me, gxy_vals.get(m,me) + phi_xy.get(m,k)*G->get(i,me,k) );

                    fyy_vals.set(m,me, fyy_vals.get(m,me) + phi_yy.get(m,k)*F->get(i,me,k) );
                    gyy_vals.set(m,me, gyy_vals.get(m,me) + phi_yy.get(m,k)*G->get(i,me,k) );
                }

            }

            // ----------------------------------- //
            // Part I: Compute (f_x + g_y)_{,t}
            // ----------------------------------- //

            // Compute terms that get multiplied by \pd2{ f }{ q } and \pd2{ g }{ q }.
            dTensor2 fx_plus_gy_t( mpoints, meqn );
            for( int  m = 1;  m <= mpoints; m++ )
            for( int me = 1; me <= meqn; me++   )
            {

                double tmp = 0.;

                // Terms that get multiplied by the Hessian:
                for( int m1=1; m1 <= meqn; m1++ )
                for( int m2=1; m2 <= meqn; m2++ )
                {

                    tmp += H.get(m,me,m1,m2,1)*qx_vals.get(m,m1)*fx_plus_gy.get(m,m2);
                    tmp += H.get(m,me,m1,m2,2)*qy_vals.get(m,m1)*fx_plus_gy.get(m,m2);
                }

                // Terms that get multiplied by f'(q) and g'(q):
                for( int m1=1; m1 <= meqn; m1++ )
                {

                    tmp += A.get(m,me,m1,1)*( fxx_vals.get(m,m1)+gxy_vals.get(m,m1) );
                    tmp += A.get(m,me,m1,2)*( fxy_vals.get(m,m1)+gyy_vals.get(m,m1) );
                }

                fx_plus_gy_t.set( m, me, tmp );
            }

            // ----------------------------------- //
            // Part II: Compute 
            //      f'(q) * fx_plus_gy_t and 
            //      g'(q) * fx_plus_gy_t
            // ----------------------------------- //

            // Add in the third term that gets multiplied by A:
            for( int m=1; m <= mpoints; m++ )
            for( int m1=1; m1 <= meqn; m1++ )
            {
                double tmp1 = 0.;
                double tmp2 = 0.;
                for( int m2=1; m2 <= meqn; m2++ )
                {
                    tmp1 += A.get(m,m1,m2,1)*fx_plus_gy_t.get(m,m2);
                    tmp2 += A.get(m,m1,m2,2)*fx_plus_gy_t.get(m,m2);
                }
                f_tt.set( m, m1, tmp1 );
                g_tt.set( m, m1, tmp2 );
            }

            // ----------------------------------------------- //
            // Part III: Add in contributions from
            //      f''(q) * (fx_plus_gy, fx_plus_gy ) and 
            //      g''(q) * (fx_plus_gy, fx_plus_gy ).
            // ----------------------------------------------- //
            for( int m =1; m <= mpoints; m++ )
            for( int me =1; me <= meqn; me++ )
            {
                double tmp1 = 0.;
                double tmp2 = 0.;

                // Terms that get multiplied by the Hessian:
                for( int m1=1; m1 <= meqn; m1++ )
                for( int m2=1; m2 <= meqn; m2++ )
                {
                    tmp1 += H.get(m,me,m1,m2,1)*fx_plus_gy.get(m,m1)*fx_plus_gy.get(m,m2);
                    tmp2 += H.get(m,me,m1,m2,2)*fx_plus_gy.get(m,m1)*fx_plus_gy.get(m,m2);
                }

                f_tt.set( m, me, f_tt.get(m,me) + tmp1 );
                g_tt.set( m, me, g_tt.get(m,me) + tmp2 );
            }

        } // End of computing "third"-order terms

        // ---------------------------------------------------------- //
        // 
        // Construct basis coefficients (integrate_on_current_cell)
        //
        // ---------------------------------------------------------- //
        for (int me=1; me<=mcomps_out; me++)		
        for (int k=1; k<=kmax; k++)
        {

            double tmp1 = 0.0;
            double tmp2 = 0.0;
            for (int mp=1; mp<=mpoints; mp++)
            {
                tmp1 += wgts.get(mp)*phi.get(mp,k)*(
                    dt_times_fdot.get(mp, me) + charlie_dt*f_tt.get(mp, me) );
                tmp2 += wgts.get(mp)*phi.get(mp,k)*(
                    dt_times_gdot.get(mp, me) + charlie_dt*g_tt.get(mp, me) );
            }
            F->set(i,me,k,  F->get(i,me,k) + 2.0*tmp1 );
            G->set(i,me,k,  G->get(i,me,k) + 2.0*tmp2 );

        }

    }

}