// Write out distribution function to FILE *out. This used when writing data to generate animation void print_2d_strobe(FILE *out, int MSIZE, ffloat *host_a0, ffloat *host_a, ffloat *host_b, ffloat host_alpha, ffloat t) { ffloat norm = 0; ffloat dphi_over_2 = host_dPhi/2.0; for( int m = 1; m < host_M+1; m++ ) { norm += (nm(host_a,0,m)+nm(host_a,0,m))*dphi_over_2; } norm *= 2*PI*sqrt(host_alpha); int i = 0; for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.01 ) { for( int m = 1; m < host_M+2; m++ ) { ffloat value = 0; for( int n = 0; n < host_N+1; n++ ) { value += nm(host_a,n,m)*cos(n*phi_x) + nm(host_b,n,m)*sin(n*phi_x); } strobe_values[i] = strobe_values[i] + (value<0?0:value); //strobe_values[i] = value<0?0:value; fprintf(out, "%0.5f %0.5f %0.20f\n", phi_x, phi_y(m), strobe_values[i]); //fprintf(out, "%0.5f %0.5f %0.20f\n", phi_x, phi_y(m), value<0?0:value); i++; } } fprintf(out, "# norm=%0.20f\n", norm); fprintf(out, "# t=%0.20f\n", t); printf("# norm=%0.20f\n", norm); } // end of print_2d_strobe(...)
void print_time_evolution_of_parameters(FILE *out, ffloat norm, ffloat *host_a, ffloat *host_b, int MSIZE, ffloat host_mu, ffloat host_alpha, ffloat host_E_dc, ffloat host_E_omega, ffloat host_omega, ffloat *host_av_data, ffloat t) { printf("\n# t=%0.20f norm=%0.20f\n", t, norm); ffloat v_dr_inst = 0 ; ffloat v_y_inst = 0; ffloat m_over_m_x_inst = 0; for( int m = 1; m < 2*host_M+2; m++ ) { v_dr_inst += nm(host_b,1,m)*host_dPhi; v_y_inst += nm(host_a,0,m)*phi_y(m)*host_dPhi; m_over_m_x_inst += nm(host_a,1,m)*host_dPhi; } ffloat v_dr_multiplier = 2*gsl_sf_bessel_I0(host_mu)*PI*sqrt(host_alpha)/gsl_sf_bessel_In(1, host_mu); ffloat v_y_multiplier = 4*PI*gsl_sf_bessel_I0(host_mu)/gsl_sf_bessel_In(1, host_mu); ffloat m_over_multiplier = PI*host_alpha*sqrt(host_alpha); v_dr_inst *= v_dr_multiplier; v_y_inst *= v_y_multiplier; m_over_m_x_inst *= m_over_multiplier; host_av_data[1] *= v_dr_multiplier; host_av_data[2] *= v_y_multiplier; host_av_data[3] *= m_over_multiplier; host_av_data[4] *= v_dr_multiplier; host_av_data[4] /= t; host_av_data[5] *= v_dr_multiplier; host_av_data[5] /= t; fprintf(out, "#E_{dc} \\tilde{E}_{\\omega} \\tilde{\\omega} mu v_{dr}/v_{p} A(\\omega) NORM v_{y}/v_{p} m/m_{x,k} <v_{dr}/v_{p}> <v_{y}/v_{p}> <m/m_{x,k}> A_{inst} t Asin\n"); fprintf(out, "%0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f\n", host_E_dc, host_E_omega, host_omega, host_mu, v_dr_inst, host_av_data[4], norm, v_y_inst, m_over_m_x_inst, host_av_data[1], host_av_data[2], host_av_data[3], cos(host_omega*t)*v_dr_inst, t, host_av_data[4]); } // end of print_time_evolution_of_parameters(...)
// Write out distribution function to FILE *out. This used when writing data to generate animation void print_2d_data(FILE *out, int MSIZE, ffloat *host_a0, ffloat *host_a, ffloat *host_b, ffloat host_alpha, ffloat t) { fprintf(out, "# t=%0.20f\n", t); ffloat norm = 0; for( int m = 1; m < 2*host_M+2; m++ ) { norm += nm(host_a,0,m)*host_dPhi; } norm *= 2*PI*sqrt(host_alpha); for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.01 ) { for( int m = 1; m < host_M+2; m++ ) { ffloat value = 0; //ffloat value0 = 0; for( int n = 0; n < host_N+1; n++ ) { value += nm(host_a,n,m)*cos(n*phi_x) + nm(host_b,n,m)*sin(n*phi_x); } fprintf(out, "%0.5f %0.5f %0.20f\n", phi_x, phi_y(m), value<0?0:value); } } fprintf(out, "# norm=%0.20f\n", norm); printf("# norm=%0.20f\n", norm); } // end of print_2d_data(...)
int main(int argc, char *argv[]) { int display = atoi(argv[1]); ffloat host_E_dc = strtod(argv[2], NULL); ffloat host_E_omega = strtod(argv[3], NULL); ffloat host_omega = strtod(argv[4], NULL); T = strtod(argv[5], NULL); N = atoi(argv[6]); PhiYmax = strtod(argv[7], NULL); ffloat B = strtod(argv[8], NULL); t_max = strtod(argv[9], NULL); dPhi = PhiYmax/M; printf("# B=%0.20f\n", B); printf("# dt=%0.20f dPhiY=%0.20f\n", dt, dPhi); ffloat mu = Delta_nu/(2*Kb*T); ffloat gamma2 = hbar*hbar/(2*Me*Kb*T*d*d); // create a0 and populate it with f0 ffloat a0[N+1][2*M+3]; ffloat A = d/(PI*hbar*sqrt(2*PI*Me*Kb*T)*gsl_sf_bessel_I0(mu)); for( int n=0; n<N+1; n++ ) { ffloat a = A*gsl_sf_bessel_In(n, mu)*(n==0?0.5:1); for( int m = 0; m < 2*M+3; m++ ) { a0[n][m] = a*exp(-gamma2*pow(phi_y(m),2)); } } if( display == 0 ) { for( int n=0; n<N; n++ ) { printf("%d %0.20f\n", n, a0[n][M]); } return 0; } if( display == 1 ) { for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.025 ) { ffloat value = 0; for( int n=0; n<N; n++ ) { value += a0[n][M+1]*cos(n*phi_x); } printf("%0.20f %0.20f %0.20f\n", phi_x, value, (d/(2*PI*hbar*gsl_sf_bessel_I0(mu)*sqrt(2*PI*Me*Kb*T)))*exp(mu*cos(phi_x))); } ffloat norm = 0; for( int m = 1; m < 2*M+2; m++ ) { norm += a0[0][m]*dPhi; } printf("# norm=%0.20f\n", norm*hbar/d*20*PI); return 0; } ffloat a[2][N+1][2*M+3]; ffloat b[2][N+1][2*M+3]; for( int c = 0; c < 2; c++ ) { for( int n = 0; n < N+1; n++ ) { for( int m = 0; m < 2*M+3; m++ ) { b[c][n][m] = 0; a[c][n][m] = a0[n][m];//*exp(-gamma2*pow(phi_y(m),2)); } } } int current = 0; int next = 1; const ffloat alpha = Delta_nu*d*d*Me/(2*hbar*hbar); const ffloat nu = (1+dt/2); const ffloat abdt = alpha*B*dt/(4*dPhi); for( ffloat t = 0; t < t_max; t += dt ) { #pragma omp parallel for for( int m = 1; m < 2*M+2; m++ ) { // #pragma omp parallel for( int n = 0; n < N; n++ ) { /* ffloat nu = 1 + dt/2; // good ffloat nu2 = nu * nu; ffloat mu_t_plus_1 = (host_E_dc + host_E_omega*cos(host_omega*(t+dt)))*n*dt/2; ffloat g=dt*a0[n]+a[n]*(1-dt/2)-eE(t)*n*b[n]*dt/2; ffloat h=b[n]*(1-dt/2)+eE(t)*n*a[n]*dt/2; a[n] = (g*nu-h*mu_t_plus_1)/(nu*nu + mu_t_plus_1*mu_t_plus_1); b[n] = (h*nu+g*mu_t_plus_1)/(nu*nu + mu_t_plus_1*mu_t_plus_1); */ ////////// ffloat beta_t_plus_1 = host_E_dc + host_E_omega*cos(host_omega*(t+dt))+B*phi_y(m); ffloat beta_t = host_E_dc + host_E_omega*cos(host_omega*(t))+B*phi_y(m); ffloat mu_t_plus_1 = n*beta_t_plus_1*dt/2; ffloat mu_t = n*beta_t*dt/2; ffloat g = dt*a0[n][m] + a[current][n][m]*(1-dt/2) - b[current][n][m]*mu_t + abdt*(b[current][n+1][m+1] - b[current][n+1][m-1] - ( n < 2 ? 0 : ( b[current][n-1][m+1] - b[current][n-1][m-1]))); ffloat h = b[current][n][m]*(1-dt/2) + a[current][n][m]*mu_t + abdt*((n==1?2:1)*(n==0?0:(a[current][n-1][m+1]-a[current][n-1][m-1])) - (a[current][n+1][m+1]-a[current][n+1][m-1])); a[next][n][m] = (g*nu-h*mu_t_plus_1)/(nu*nu+mu_t_plus_1*mu_t_plus_1); if( n > 0 ) { b[next][n][m] = (g*mu_t_plus_1+h*nu)/(nu*nu+mu_t_plus_1*mu_t_plus_1); } ////////////////////////////////////// /* ffloat g = a[current][n][m] + dt*a0[n][m] + abdt*(b[current][n+1][m+1] - b[current][n+1][m-1] - ( n < 2 ? 0 : ( b[current][n-1][m+1] - b[current][n-1][m-1]))); ffloat h = b[current][n][m] + abdt*((n==1?2:1)*(n==0?0:(a[current][n-1][m+1]-a[current][n-1][m-1])) - (a[current][n+1][m+1]-a[current][n+1][m-1])); ffloat beta_t_plus_1 = host_E_dc + host_E_omega*cos(host_omega*(t+dt))+B*phi_y(m); ffloat mu = n*beta_t_plus_1*dt; a[next][n][m] = (g*nu-h*mu)/(nu*nu+mu*mu); if( n > 0 ) { b[next][n][m] = (g*mu+h*nu)/(nu*nu+mu*mu); } */ /////////////////////////////////////// /* a[next][n][m] = dt*a0[n][m] + (a[current][n][m-1]+a[current][n][m+1])*(1-dt)/2 - (b[current][n][m-1]+b[current][n][m+1])*n*beta*dt/2 + alpha*B*dt/(4*dPhi)*(b[current][n+1][m+1] - b[current][n+1][m-1] - ( n < 2 ? 0 : ( b[current][n-1][m+1] - b[current][n-1][m-1]) ) ); if( n == 0 ) { continue; } // n here is always 1 or greater b[next][n][m] = (b[current][n][m-1]+b[current][n][m+1])*(1-dt)/2 + (a[current][n][m-1]+a[current][n][m+1])*n*beta*dt/2 + alpha*B*dt/(4*dPhi)*((n==1?2:1)*(a[current][n-1][m+1]-a[current][n-1][m-1]) - (a[current][n+1][m+1]-a[current][n+1][m-1])); */ } } #pragma end parallel for //printf("%d\n", current); if( current == 0 ) { current = 1; next = 0; } else { current = 0; next = 1; } } if( display == 2 ) { for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.025 ) { ffloat value = 0; ffloat value0 = 0; for( int n=0; n<N; n++ ) { value += a[current][n][962]*cos(n*phi_x);// + b[current][n][962]*sin(n*phi_x); // value += a0[n][M+1]*cos(n*phi_x); value0 += a0[n][962]*cos(n*phi_x); } printf("%0.20f %0.20f %0.20f\n", phi_x, value, value0); // (d/(2*PI*hbar*gsl_sf_bessel_I0(mu)*sqrt(2*PI*Me*Kb*T)))*exp(mu*cos(phi_x))); } return 0; } if( display == 3 ) { ffloat value_min = 100; int m_min = -1; for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.04 ) { for( int m = 1; m < 2*M+2; m++ ) { ffloat value = 0; ffloat value0 = 0; for( int n = 0; n < N+1; n++ ) { value += a[current][n][m]*cos(n*phi_x) + b[current][n][m]*sin(n*phi_x); value0 += a0[n][m]*cos(n*phi_x); } printf("%0.20f %0.20f %0.20f %0.20f\n", phi_x, phi_y(m), value<0?0:value, value0); if( value < value_min ) { value_min = value; m_min = m; } } printf("# v_min = %0.20f @ m=%d\n", value_min, m_min); } return 0; } if( display == 4 ) { ffloat norm = 0; for( int m = 1; m < 2*M+2; m++ ) { norm += a[current][0][m]*dPhi; } norm *= hbar*2*PI/(d*d); ffloat v_dr_av = 0; ffloat v_dr_final = 0; for( int m = 1; m < 2*M+2; m++ ) { v_dr_final += b[current][1][m]*dPhi; } v_dr_av = hbar * PI * v_dr_final / ( d * d ); // this is really v_{dr}/v_0 printf("#E_{dc} \\tilde{E}_{\\omega} \\tilde{\\omega} T <v_{dr}/v_{0}> A(\\omega) NORM\n"); printf("%0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f\n", host_E_dc, host_E_omega, host_omega, T, v_dr_av, 0.0, norm); } }
int main(int argc, char **argv) { parse_cmd(argc, argv); cudaSetDevice(device); ffloat T = host_omega>0?(2*PI/host_omega):0; // period of external a/c emf if( display == 9 ) { t_max = t_start + 101*T; init_strobe_array(); } else { t_max = t_start + T; } if( quiet == 0 ) { printf("# t_max = %0.20f kernel=%d\n", t_max, BLTZM_KERNEL); } // we will allocate enough memory to accommodate range // from -PhiY_max_range to PhiY_max_range, but will use only // part of it from PhiYmin to PhiYmax ffloat PhiY_max_range = fabs(PhiYmin); if( PhiY_max_range < fabs(PhiYmax) ) { PhiY_max_range = fabs(PhiYmax); } //host_dPhi = PhiY_max_range/host_M; host_dPhi = (PhiYmax-PhiYmin)/host_M; NSIZE = host_N+1; //MSIZE = 2*host_M+3; MSIZE = host_M+3; PADDED_MSIZE = (MSIZE*sizeof(ffloat))%128==0?MSIZE:((((MSIZE*sizeof(ffloat))/128)*128+128)/sizeof(ffloat)); printf("PADDED MEMORY FROM %d ELEMENTS PER ROW TO %d\n", MSIZE, (int)PADDED_MSIZE); MP1 = host_M+1; // SIZE_2D = NSIZE*PADDED_MSIZE; const int SIZE_2Df = SIZE_2D*sizeof(ffloat); host_TMSIZE=host_M+1; host_nu = 1+host_dt/2; host_nu2 = host_nu * host_nu; host_nu_tilde = 1-host_dt/2; host_bdt = host_B*host_dt/(4*host_dPhi); load_data(); // create a0 and populate it with f0 ffloat *host_a0; host_a0 = (ffloat *)calloc(SIZE_2D, sizeof(ffloat)); for( int n=0; n<host_N+1; n++ ) { ffloat a = gsl_sf_bessel_In(n, host_mu)*(n==0?0.5:1)/(PI*gsl_sf_bessel_In(0, host_mu))*sqrt(host_mu/(2*PI*host_alpha)); for( int m = 0; m < host_M+3; m++ ) { nm(host_a0, n, m) = a*expl(-host_mu*pow(phi_y(m),2)/2); } } // create device_a0 and transfer data from host_a0 to device_a0 ffloat *a0; HANDLE_ERROR(cudaMalloc((void **)&a0, SIZE_2Df)); HANDLE_ERROR(cudaMemcpy(a0, host_a0, SIZE_2Df, cudaMemcpyHostToDevice)); // create a and b 2D vectors, four of each. one for current, // another for next pointer on main and shifted grids ffloat *host_a = (ffloat *)calloc(SIZE_2D, sizeof(ffloat)); ffloat *host_b = (ffloat *)calloc(SIZE_2D, sizeof(ffloat)); ffloat *a[4]; ffloat *b[4]; for( int i = 0; i < 4; i++ ) { HANDLE_ERROR(cudaMalloc((void **)&a[i], SIZE_2Df)); HANDLE_ERROR(cudaMalloc((void **)&b[i], SIZE_2Df)); // zero vector b[i] HANDLE_ERROR(cudaMemset((void *)a[i], 0, SIZE_2Df)); HANDLE_ERROR(cudaMemset((void *)b[i], 0, SIZE_2Df)); } int current = 0; int next = 1; int current_hs = 2; int next_hs = 3; // 'hs' - half step // init vectors a[0] and a[2] HANDLE_ERROR(cudaMemcpy(a[current], host_a0, SIZE_2Df, cudaMemcpyHostToDevice)); int blocks = (host_M+3)/TH_PER_BLOCK; // tiptow to the first half step ffloat *host_a_hs = (ffloat *)calloc(SIZE_2D, sizeof(ffloat)); ffloat *host_b_hs = (ffloat *)calloc(SIZE_2D, sizeof(ffloat)); ffloat cos_omega_t = 1; // cos(host_omega*t); for t = 0 ffloat cos_omega_t_plus_dt = cos(host_omega*(host_dt)); step_on_grid(blocks, a0, a[current], b[current], a[current_hs], b[current_hs], a[current], b[current], 0, 0, cos_omega_t, cos_omega_t_plus_dt); /* // temporary solution // FIX ME!!! memcpy(host_a_hs, host_a, SIZE_2D*sizeof(ffloat)); HANDLE_ERROR(cudaMemcpy(a[current_hs], host_a_hs, SIZE_2Df, cudaMemcpyHostToDevice)); HANDLE_ERROR(cudaMemcpy(b[current_hs], host_b_hs, SIZE_2Df, cudaMemcpyHostToDevice)); */ // used for file names when generated data for making animation char *file_name_buf = (char *)calloc(128, sizeof(char)); char buf[16384]; // output buffer for writing frame data when display==77 int step = 0; ffloat frame_time = 0; int frame_number = 1; ffloat *host_av_data; host_av_data = (ffloat *)calloc(5, sizeof(ffloat)); ffloat *av_data; HANDLE_ERROR(cudaMalloc((void **)&av_data, 6*sizeof(ffloat))); HANDLE_ERROR(cudaMemset((void *)av_data, 0, 6*sizeof(ffloat))); float t_hs = 0; ffloat t0 = 0; ffloat t = t0; ffloat timeout = -999; ffloat last_tT_reminder = 0; for(;;) { //read_from int ccc = 0; for( t = t0; t < t_max; t += host_dt ) { /// XXX //ccc++; //if( ccc == 51 ) { break; } t_hs = t + host_dt/2; cos_omega_t = cos(host_omega*t); cos_omega_t_plus_dt = cos(host_omega*(t+host_dt)); step_on_grid(blocks, a0, a[current], b[current], a[next], b[next], a[current_hs], b[current_hs], t, t_hs, cos_omega_t, cos_omega_t_plus_dt); cudaThreadSynchronize(); cos_omega_t = cos(host_omega*t_hs); cos_omega_t_plus_dt = cos(host_omega*(t_hs+host_dt)); step_on_half_grid(blocks, a0, a[current], b[current], a[next], b[next], a[current_hs], b[current_hs], a[next_hs], b[next_hs], t, t_hs, cos_omega_t, cos_omega_t_plus_dt); /* if( t >= 0 ) { HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost)); sprintf(file_name_buf, "strobe.data"); FILE *frame_file_stream = fopen(file_name_buf, "w"); setvbuf(frame_file_stream, buf, _IOFBF, sizeof(buf)); printf("\nWriting strobe %s\n", file_name_buf); print_2d_strobe(frame_file_stream, MSIZE, host_a0, host_a, host_b, host_alpha, t); fclose(frame_file_stream); frame_time = 0; break; } /// XXX REMOVE ME */ if( host_E_omega > 0 && display == 77 && frame_time >= 0.01) { // we need to perform averaging of v_dr, m_x and A av(blocks, a[next], b[next], av_data, t); HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaMemcpy(host_av_data, av_data, 6*sizeof(ffloat), cudaMemcpyDeviceToHost)); ffloat norm = eval_norm(host_a, host_alpha, MSIZE); print_time_evolution_of_parameters(out, norm, host_a, host_b, MSIZE, host_mu, host_alpha, host_E_dc, host_E_omega, host_omega, host_av_data, t); frame_time = 0; } if( host_E_omega > 0 && display != 7 && display != 77 && display != 8 && t >= t_start ) { // we need to perform averaging of v_dr, m_x and A av(blocks, a[next], b[next], av_data, t); } if( current == 0 ) { current = 1; next = 0; } else { current = 0; next = 1; } if( current_hs == 2 ) { current_hs = 3; next_hs = 2; } else { current_hs = 2; next_hs = 3; } //if( display == 9 && t >= t_start ) { // ffloat tT = t/T; // printf("t=%0.12f %0.12f %0.12f\n", t, , T); //} if( display == 9 && t >= t_start ) { // XXX PUT ME BACK ffloat tT = t/T; ffloat tT_reminder = tT-((int)tT); if( tT_reminder < last_tT_reminder ) { HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost)); sprintf(file_name_buf, "strobe%08d.data", frame_number++); FILE *frame_file_stream = fopen(file_name_buf, "w"); setvbuf(frame_file_stream, buf, _IOFBF, sizeof(buf)); printf("\nWriting strobe %s\n", file_name_buf); print_2d_strobe(frame_file_stream, MSIZE, host_a0, host_a, host_b, host_alpha, t); fclose(frame_file_stream); frame_time = 0; } last_tT_reminder = tT_reminder; } if( display == 7 && frame_time >= 0.01 && t > frame_start ) { // we are making movie HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost)); sprintf(file_name_buf, "frame%08d.data", frame_number++); FILE *frame_file_stream = fopen(file_name_buf, "w"); setvbuf(frame_file_stream, buf, _IOFBF, sizeof(buf)); printf("\nWriting frame %s\n", file_name_buf); print_2d_data(frame_file_stream, MSIZE, host_a0, host_a, host_b, host_alpha, t); fclose(frame_file_stream); frame_time=0; } if( out != stdout && display != 7 ) { step++; if( step == 300 ) { printf("\rt=%0.9f %0.2f%%", t, t/t_max*100); fflush(stdout); step = 0; } } frame_time += host_dt; if( display == 9 && t <= t_start && frame_time >= T ) { frame_time == 0; } } HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaMemcpy(host_av_data, av_data, 6*sizeof(ffloat), cudaMemcpyDeviceToHost)); ffloat norm = 0; ffloat dphi_over_2 = host_dPhi/2.0; for( int m = 1; m < host_M+1; m++ ) { norm += (nm(host_a,0,m)+nm(host_a,0,m))*dphi_over_2; } norm *= 2*PI*sqrt(host_alpha); if( display == 3 ) { for( ffloat phi_x = -PI; phi_x < PI; phi_x += 0.01 ) { for( int m = 1; m < host_M; m++ ) { ffloat value = 0; ffloat value0 = 0; for( int n = 0; n < host_N+1; n++ ) { value += nm(host_a,n,m)*cos(n*phi_x) + nm(host_b,n,m)*sin(n*phi_x); value0 += nm(host_a0,n,m)*cos(n*phi_x); } fprintf(out, "%0.5f %0.5f %0.20f %0.20f\n", phi_x, phi_y(m), value<0?0:value, value0<0?0:value0); } } fprintf(out, "# norm=%0.20f\n", norm); printf("# norm=%0.20f\n", norm); //if( out != stdout ) { fclose(out); } cuda_clean_up(); return EXIT_SUCCESS; } if( display == 8 ) { // single shot image HANDLE_ERROR(cudaMemcpy(host_a, a[current], SIZE_2Df, cudaMemcpyDeviceToHost)); HANDLE_ERROR(cudaMemcpy(host_b, b[current], SIZE_2Df, cudaMemcpyDeviceToHost)); sprintf(file_name_buf, "frame.data"); FILE *frame_file_stream = fopen(file_name_buf, "w"); setvbuf(frame_file_stream, buf, _IOFBF, sizeof(buf)); printf("\nWriting frame %s\n", file_name_buf); print_2d_data(frame_file_stream, MSIZE, host_a0, host_a, host_b, host_alpha, t); fclose(frame_file_stream); frame_time=0; return EXIT_SUCCESS; } if( display == 4 ) { if( quiet == 0 ) { printf("\n# norm=%0.20f\n", norm); } ffloat v_dr_inst = 0 ; ffloat v_y_inst = 0; ffloat m_over_m_x_inst = 0; for( int m = 1; m < host_M; m++ ) { v_dr_inst += nm(host_b,1,m)*host_dPhi; v_y_inst += nm(host_a,0,m)*phi_y(m)*host_dPhi; m_over_m_x_inst += nm(host_a,1,m)*host_dPhi; } ffloat v_dr_multiplier = 2*gsl_sf_bessel_I0(host_mu)*PI*sqrt(host_alpha)/gsl_sf_bessel_In(1, host_mu); ffloat v_y_multiplier = 4*PI*gsl_sf_bessel_I0(host_mu)/gsl_sf_bessel_In(1, host_mu); ffloat m_over_multiplier = PI*host_alpha*sqrt(host_alpha); v_dr_inst *= v_dr_multiplier; v_y_inst *= v_y_multiplier; m_over_m_x_inst *= m_over_multiplier; host_av_data[1] *= v_dr_multiplier; host_av_data[2] *= v_y_multiplier; host_av_data[3] *= m_over_multiplier; host_av_data[4] *= v_dr_multiplier; host_av_data[4] /= T; host_av_data[5] *= v_dr_multiplier; host_av_data[5] /= T; fprintf(out, "# display=%d E_dc=%0.20f E_omega=%0.20f omega=%0.20f mu=%0.20f alpha=%0.20f n-harmonics=%d PhiYmin=%0.20f PhiYmax=%0.20f B=%0.20f t-max=%0.20f dt=%0.20f g-grid=%d\n", display, host_E_dc, host_E_omega, host_omega, host_mu, host_alpha, host_N, PhiYmin, PhiYmax, host_B, t_start, host_dt, host_M); fprintf(out, "#E_{dc} \\tilde{E}_{\\omega} \\tilde{\\omega} mu v_{dr}/v_{p} A(\\omega) NORM v_{y}/v_{p} m/m_{x,k} <v_{dr}/v_{p}> <v_{y}/v_{p}> <m/m_{x,k}> Asin\n"); fprintf(out, "%0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f %0.20f\n", host_E_dc, host_E_omega, host_omega, host_mu, v_dr_inst, host_av_data[4], norm, v_y_inst, m_over_m_x_inst, host_av_data[1], host_av_data[2], host_av_data[3], host_av_data[5]); } if( read_from == NULL ) { break; } // scan for new parameters timeout = scan_for_new_parameters(); if( timeout < -900 ) { break; } // user entered 'exit' t_start = t + timeout; t_max = t_start + T; t0 = t + host_dt; T=host_omega>0?(2*PI/host_omega):0; load_data(); // re-load data HANDLE_ERROR(cudaMemset((void *)av_data, 0, 6*sizeof(ffloat))); // clear averaging data if( quiet == 0 ) { printf("# t_max = %0.20f\n", t_max); } } // for(;;) if( out != NULL && out != stdout ) { fclose(out); } cuda_clean_up(); return EXIT_SUCCESS; } // end of main(...)
// All-purpose routine for computing the L2-projection // of various functions onto the gradient of the Legendre basis // (Unstructured grid version) // void L2ProjectGrad_Unst( const dTensor2* vel_vec, const int istart, const int iend, const int QuadOrder, const int BasisOrder_qin, const int BasisOrder_auxin, const int BasisOrder_fout, const mesh& Mesh, const dTensor3* qin, const dTensor3* auxin, dTensor3* fout, void (*Func)(const dTensor2* vel_vec, const dTensor2&,const dTensor2&, const dTensor2&,dTensor3&)) { // starting and ending indeces const int NumElems = Mesh.get_NumElems(); assert_ge(istart,1); assert_le(iend,NumElems); // qin variable assert_eq(NumElems,qin->getsize(1)); const int meqn = qin->getsize(2); const int kmax_qin = qin->getsize(3); assert_eq(kmax_qin,(BasisOrder_qin*(BasisOrder_qin+1))/2); // auxin variable assert_eq(NumElems,auxin->getsize(1)); const int maux = auxin->getsize(2); const int kmax_auxin = auxin->getsize(3); assert_eq(kmax_auxin,(BasisOrder_auxin*(BasisOrder_auxin+1))/2); // fout variables assert_eq(NumElems,fout->getsize(1)); const int mcomps_out = fout->getsize(2); const int kmax_fout = fout->getsize(3); assert_eq(kmax_fout,(BasisOrder_fout*(BasisOrder_fout+1))/2); // number of quadrature points assert_ge(QuadOrder,1); assert_le(QuadOrder,5); int mpoints; switch ( QuadOrder ) { case 1: mpoints = 0; break; case 2: mpoints = 1; break; case 3: mpoints = 6; break; case 4: mpoints = 7; break; case 5: mpoints = 16; break; } // trivial case if ( QuadOrder==1 ) { for (int i=istart; i<=iend; i++) for (int m=1; m<=mcomps_out; m++) for (int k=1; k<=kmax_fout; k++) { fout->set(i,m,k, 0.0 ); } } else { const int kmax = iMax(iMax(kmax_qin,kmax_auxin),kmax_fout); dTensor2 spts(mpoints,2); dTensor1 wgts(mpoints); dTensor2 xpts(mpoints,2); dTensor2 qvals(mpoints,meqn); dTensor2 auxvals(mpoints,maux); dTensor3 fvals(mpoints,mcomps_out,2); dTensor2 mu(mpoints,kmax); // monomial basis (non-orthogonal) dTensor2 phi(mpoints,kmax); // Legendre basis (orthogonal) dTensor2 mu_xi(mpoints,kmax_fout); // xi-derivative of monomial basis (non-orthogonal) dTensor2 mu_eta(mpoints,kmax_fout); // eta-derivative of monomial basis (non-orthogonal) dTensor2 phi_xi(mpoints,kmax_fout); // xi-derivative of Legendre basis (orthogonal) dTensor2 phi_eta(mpoints,kmax_fout); // eta-derivative of Legendre basis (orthogonal) dTensor2 phi_x(mpoints,kmax_fout); // x-derivative of Legendre basis (orthogonal) dTensor2 phi_y(mpoints,kmax_fout); // y-derivative of Legendre basis (orthogonal) switch ( QuadOrder ) { case 2: spts.set(1,1, 0.0 ); spts.set(1,2, 0.0 ); wgts.set(1, 0.5 ); break; case 3: spts.set(1,1, 0.112615157582632 ); spts.set(1,2, 0.112615157582632 ); spts.set(2,1, -0.225230315165263 ); spts.set(2,2, 0.112615157582632 ); spts.set(3,1, 0.112615157582632 ); spts.set(3,2, -0.225230315165263 ); spts.set(4,1, -0.241757119823562 ); spts.set(4,2, -0.241757119823562 ); spts.set(5,1, 0.483514239647126 ); spts.set(5,2, -0.241757119823562 ); spts.set(6,1, -0.241757119823562 ); spts.set(6,2, 0.483514239647126 ); wgts.set(1, 0.1116907948390055 ); wgts.set(2, 0.1116907948390055 ); wgts.set(3, 0.1116907948390055 ); wgts.set(4, 0.0549758718276610 ); wgts.set(5, 0.0549758718276610 ); wgts.set(6, 0.0549758718276610 ); break; case 4: spts.set(1,1, 0.000000000000000 ); spts.set(1,2, 0.000000000000000 ); spts.set(2,1, 0.136808730771782 ); spts.set(2,2, 0.136808730771782 ); spts.set(3,1, -0.273617461543563 ); spts.set(3,2, 0.136808730771782 ); spts.set(4,1, 0.136808730771782 ); spts.set(4,2, -0.273617461543563 ); spts.set(5,1, -0.232046826009877 ); spts.set(5,2, -0.232046826009877 ); spts.set(6,1, 0.464093652019754 ); spts.set(6,2, -0.232046826009877 ); spts.set(7,1, -0.232046826009877 ); spts.set(7,2, 0.464093652019754 ); wgts.set(1, 0.1125000000000000 ); wgts.set(2, 0.0661970763942530 ); wgts.set(3, 0.0661970763942530 ); wgts.set(4, 0.0661970763942530 ); wgts.set(5, 0.0629695902724135 ); wgts.set(6, 0.0629695902724135 ); wgts.set(7, 0.0629695902724135 ); break; case 5: spts.set(1,1, 0.000000000000000 ); spts.set(1,2, 0.000000000000000 ); spts.set(2,1, 0.125959254959390 ); spts.set(2,2, 0.125959254959390 ); spts.set(3,1, -0.251918509918779 ); spts.set(3,2, 0.125959254959390 ); spts.set(4,1, 0.125959254959390 ); spts.set(4,2, -0.251918509918779 ); spts.set(5,1, -0.162764025581573 ); spts.set(5,2, -0.162764025581573 ); spts.set(6,1, 0.325528051163147 ); spts.set(6,2, -0.162764025581573 ); spts.set(7,1, -0.162764025581573 ); spts.set(7,2, 0.325528051163147 ); spts.set(8,1, -0.282786105016302 ); spts.set(8,2, -0.282786105016302 ); spts.set(9,1, 0.565572210032605 ); spts.set(9,2, -0.282786105016302 ); spts.set(10,1, -0.282786105016302 ); spts.set(10,2, 0.565572210032605 ); spts.set(11,1, -0.324938555923375 ); spts.set(11,2, -0.070220503698695 ); spts.set(12,1, -0.324938555923375 ); spts.set(12,2, 0.395159059622071 ); spts.set(13,1, -0.070220503698695 ); spts.set(13,2, -0.324938555923375 ); spts.set(14,1, -0.070220503698695 ); spts.set(14,2, 0.395159059622071 ); spts.set(15,1, 0.395159059622071 ); spts.set(15,2, -0.324938555923375 ); spts.set(16,1, 0.395159059622071 ); spts.set(16,2, -0.070220503698695 ); wgts.set(1, 0.0721578038388935 ); wgts.set(2, 0.0475458171336425 ); wgts.set(3, 0.0475458171336425 ); wgts.set(4, 0.0475458171336425 ); wgts.set(5, 0.0516086852673590 ); wgts.set(6, 0.0516086852673590 ); wgts.set(7, 0.0516086852673590 ); wgts.set(8, 0.0162292488115990 ); wgts.set(9, 0.0162292488115990 ); wgts.set(10, 0.0162292488115990 ); wgts.set(11, 0.0136151570872175 ); wgts.set(12, 0.0136151570872175 ); wgts.set(13, 0.0136151570872175 ); wgts.set(14, 0.0136151570872175 ); wgts.set(15, 0.0136151570872175 ); wgts.set(16, 0.0136151570872175 ); break; } // Loop over each quadrature point and construct monomial polys for (int m=1; m<=mpoints; m++) { // coordinates const double xi = spts.get(m,1); const double xi2 = xi*xi; const double xi3 = xi2*xi; const double xi4 = xi3*xi; const double eta = spts.get(m,2); const double eta2 = eta*eta; const double eta3 = eta2*eta; const double eta4 = eta3*eta; // monomial basis functions at each gaussian quadrature point switch( kmax ) { case 15: // fifth order mu.set(m, 15, eta4 ); mu.set(m, 14, xi4 ); mu.set(m, 13, xi2*eta2 ); mu.set(m, 12, eta3*xi ); mu.set(m, 11, xi3*eta ); case 10: // fourth order mu.set(m, 10, eta3 ); mu.set(m, 9, xi3 ); mu.set(m, 8, xi*eta2 ); mu.set(m, 7, eta*xi2 ); case 6: // third order mu.set(m, 6, eta2 ); mu.set(m, 5, xi2 ); mu.set(m, 4, xi*eta ); case 3: // second order mu.set(m, 3, eta ); mu.set(m, 2, xi ); case 1: // first order mu.set(m, 1, 1.0 ); break; } // Loop over each quadrature point and construct Legendre polys for (int i=1; i<=kmax; i++) { double tmp = 0.0; for (int j=1; j<=i; j++) { tmp = tmp + Mmat[i-1][j-1]*mu.get(m,j); } phi.set(m,i, tmp ); } // Gradient of monomial basis functions at each gaussian quadrature point switch( kmax_fout ) { case 15: // fifth order mu_xi.set( m,15, 0.0 ); mu_xi.set( m,14, 4.0*xi3 ); mu_xi.set( m,13, 2.0*xi*eta2 ); mu_xi.set( m,12, eta3 ); mu_xi.set( m,11, 3.0*xi2*eta ); mu_eta.set( m,15, 4.0*eta3 ); mu_eta.set( m,14, 0.0 ); mu_eta.set( m,13, 2.0*xi2*eta ); mu_eta.set( m,12, 3.0*eta2*xi ); mu_eta.set( m,11, xi3 ); case 10: // fourth order mu_xi.set( m,10, 0.0 ); mu_xi.set( m,9, 3.0*xi2 ); mu_xi.set( m,8, eta2 ); mu_xi.set( m,7, 2.0*eta*xi ); mu_eta.set( m,10, 3.0*eta2 ); mu_eta.set( m,9, 0.0 ); mu_eta.set( m,8, 2.0*eta*xi ); mu_eta.set( m,7, xi2 ); case 6: // third order mu_xi.set( m,6, 0.0 ); mu_xi.set( m,5, 2.0*xi ); mu_xi.set( m,4, eta ); mu_eta.set( m,6, 2.0*eta ); mu_eta.set( m,5, 0.0 ); mu_eta.set( m,4, xi ); case 3: // second order mu_xi.set( m,3, 0.0 ); mu_xi.set( m,2, 1.0 ); mu_eta.set( m,3, 1.0 ); mu_eta.set( m,2, 0.0 ); case 1: // first order mu_xi.set( m,1, 0.0 ); mu_eta.set( m,1, 0.0 ); break; } // Loop over each quadrature point and construct Legendre polys for (int i=1; i<=kmax_fout; i++) { double tmp1 = 0.0; double tmp2 = 0.0; for (int j=1; j<=i; j++) { tmp1 = tmp1 + Mmat[i-1][j-1]*mu_xi.get(m,j); tmp2 = tmp2 + Mmat[i-1][j-1]*mu_eta.get(m,j); } phi_xi.set(m,i, tmp1 ); phi_eta.set(m,i, tmp2 ); } } // ------------------------------------------------------------- // Loop over every grid cell indexed by user supplied parameters // described by istart...iend // ------------------------------------------------------------- #pragma omp parallel for for (int i=istart; i<=iend; i++) { // Find center of current cell const int i1 = Mesh.get_tnode(i,1); const int i2 = Mesh.get_tnode(i,2); const int i3 = Mesh.get_tnode(i,3); const double x1 = Mesh.get_node(i1,1); const double y1 = Mesh.get_node(i1,2); const double x2 = Mesh.get_node(i2,1); const double y2 = Mesh.get_node(i2,2); const double x3 = Mesh.get_node(i3,1); const double y3 = Mesh.get_node(i3,2); const double xc = (x1+x2+x3)/3.0; const double yc = (y1+y2+y3)/3.0; // Compute q, aux and fvals at each Gaussian Quadrature point // for this current cell indexed by (i,j) // Save results into dTensor2 qvals, auxvals and fvals. for (int m=1; m<=mpoints; m++) { // convert phi_xi and phi_eta derivatives // to phi_x and phi_y derivatives through Jacobian for (int k=1; k<=kmax_fout; k++) { phi_x.set(m,k, Mesh.get_jmat(i,1,1)*phi_xi.get(m,k) + Mesh.get_jmat(i,1,2)*phi_eta.get(m,k) ); phi_y.set(m,k, Mesh.get_jmat(i,2,1)*phi_xi.get(m,k) + Mesh.get_jmat(i,2,2)*phi_eta.get(m,k) ); } // point on the unit triangle const double s = spts.get(m,1); const double t = spts.get(m,2); // point on the physical triangle xpts.set(m,1, xc + (x2-x1)*s + (x3-x1)*t ); xpts.set(m,2, yc + (y2-y1)*s + (y3-y1)*t ); // Solution values (q) at each grid point for (int me=1; me<=meqn; me++) { qvals.set(m,me, 0.0 ); for (int k=1; k<=kmax_qin; k++) { qvals.set(m,me, qvals.get(m,me) + phi.get(m,k) * qin->get(i,me,k) ); } } // Auxiliary values (aux) at each grid point for (int ma=1; ma<=maux; ma++) { auxvals.set(m,ma, 0.0 ); for (int k=1; k<=kmax_auxin; k++) { auxvals.set(m,ma, auxvals.get(m,ma) + phi.get(m,k) * auxin->get(i,ma,k) ); } } } // Call user-supplied function to set fvals Func(vel_vec, xpts, qvals, auxvals, fvals); // Evaluate integral on current cell (project onto Legendre basis) // using Gaussian Quadrature for the integration for (int m1=1; m1<=mcomps_out; m1++) for (int m2=1; m2<=kmax_fout; m2++) { double tmp = 0.0; for (int k=1; k<=mpoints; k++) { tmp = tmp + wgts.get(k)* ( fvals.get(k,m1,1)*phi_x.get(k,m2) + fvals.get(k,m1,2)*phi_y.get(k,m2) ); } fout->set(i, m1, m2, 2.0*tmp ); } } } }
// Modified version of the all purpose routine L2Project specifically written // for projecting the "time-averaged" flux function onto the basis function. // // This routine also returns the coefficients of the Lax Wendroff Flux // Function when expanded with legendre basis functions, and therefore the // basis expansions produced by this routine can be used for all of the // Riemann solves. // // --------------------------------------------------------------------- // Inputs should have the following sizes: // TODO - document the inputs here // --------------------------------------------------------------------- void L2ProjectLxW_Unst( const int mterms, const double alpha, const double beta_dt, const double charlie_dt, const int istart, const int iend, // Start-stop indices const int QuadOrder, const int BasisOrder_qin, const int BasisOrder_auxin, const int BasisOrder_fout, const mesh& Mesh, const dTensor3* qin, const dTensor3* auxin, // state vector dTensor3* F, dTensor3* G, // time-averaged Flux function void FluxFunc (const dTensor2& xpts, const dTensor2& Q, const dTensor2& Aux, dTensor3& flux), void DFluxFunc (const dTensor2& xpts, const dTensor2& Q, const dTensor2& aux, dTensor4& Dflux), void D2FluxFunc (const dTensor2& xpts, const dTensor2& Q, const dTensor2& aux, dTensor5& D2flux) ) { if( fabs( alpha ) < 1e-14 && fabs( beta_dt ) < 1e-14 && fabs( charlie_dt ) < 1e-14 ) { F->setall(0.); G->setall(0.); return; } // starting and ending indices const int NumElems = Mesh.get_NumElems(); assert_ge(istart,1); assert_le(iend,NumElems); // qin variable assert_eq(NumElems,qin->getsize(1)); const int meqn = qin->getsize(2); const int kmax_qin = qin->getsize(3); assert_eq(kmax_qin,(BasisOrder_qin*(BasisOrder_qin+1))/2); // auxin variable assert_eq(NumElems,auxin->getsize(1)); const int maux = auxin->getsize(2); const int kmax_auxin = auxin->getsize(3); assert_eq(kmax_auxin,(BasisOrder_auxin*(BasisOrder_auxin+1))/2); // fout variables assert_eq(NumElems, F->getsize(1)); const int mcomps_out = F->getsize(2); const int kmax_fout = F->getsize(3); assert_eq(kmax_fout, (BasisOrder_fout*(BasisOrder_fout+1))/2 ); // number of quadrature points assert_ge(QuadOrder, 1); assert_le(QuadOrder, 5); // Number of quadrature points int mpoints; switch( QuadOrder ) { case 1: mpoints = 1; break; case 2: mpoints = 3; break; case 3: mpoints = 6; break; case 4: mpoints = 12; break; case 5: mpoints = 16; break; } const int kmax = iMax(iMax(kmax_qin, kmax_auxin), kmax_fout); dTensor2 phi(mpoints, kmax); // Legendre basis (orthogonal) dTensor2 spts(mpoints, 2); // List of quadrature points dTensor1 wgts(mpoints); // List of quadrature weights setQuadPoints_Unst( QuadOrder, wgts, spts ); // ---------------------------------------------------------------------- // // Evaluate the basis functions at each point SetLegendreAtPoints_Unst(spts, phi); // ---------------------------------------------------------------------- // // ---------------------------------------------------------------------- // // First-order derivatives dTensor2 phi_xi (mpoints, kmax ); dTensor2 phi_eta(mpoints, kmax ); SetLegendreGrad_Unst( spts, phi_xi, phi_eta ); // ---------------------------------------------------------------------- // // ---------------------------------------------------------------------- // // Second-order derivatives dTensor2 phi_xi2 (mpoints, kmax ); dTensor2 phi_xieta(mpoints, kmax ); dTensor2 phi_eta2 (mpoints, kmax ); LegendreDiff2_Unst(spts, &phi_xi2, &phi_xieta, &phi_eta2 ); // ---------------------------------------------------------------------- // // ------------------------------------------------------------- // // Loop over every grid cell indexed by user supplied parameters // // described by istart...iend, jstart...jend // // ------------------------------------------------------------- // #pragma omp parallel for for (int i=istart; i<=iend; i++) { // These need to be defined locally. Each mesh element carries its // own change of basis matrix, so these need to be recomputed for // each element. The canonical derivatives, phi_xi, and phi_eta can // be computed and shared for each element. // First-order derivatives dTensor2 phi_x(mpoints, kmax_fout); // x-derivative of Legendre basis (orthogonal) dTensor2 phi_y(mpoints, kmax_fout); // y-derivative of Legendre basis (orthogonal) // Second-order derivatives dTensor2 phi_xx(mpoints, kmax_fout); // xx-derivative of Legendre basis (orthogonal) dTensor2 phi_xy(mpoints, kmax_fout); // xy-derivative of Legendre basis (orthogonal) dTensor2 phi_yy(mpoints, kmax_fout); // yy-derivative of Legendre basis (orthogonal) //find center of current cell const int i1 = Mesh.get_tnode(i,1); const int i2 = Mesh.get_tnode(i,2); const int i3 = Mesh.get_tnode(i,3); // Corners: const double x1 = Mesh.get_node(i1,1); const double y1 = Mesh.get_node(i1,2); const double x2 = Mesh.get_node(i2,1); const double y2 = Mesh.get_node(i2,2); const double x3 = Mesh.get_node(i3,1); const double y3 = Mesh.get_node(i3,2); // Center of current cell: const double xc = (x1+x2+x3)/3.0; const double yc = (y1+y2+y3)/3.0; // Variables that need to be written to, and therefore are // created for each thread dTensor2 xpts (mpoints, 2); dTensor2 qvals (mpoints, meqn); dTensor2 auxvals(mpoints, maux); // local storage for Flux function its Jacobian, and the Hessian: dTensor3 fvals(mpoints, meqn, 2); // flux function (vector) dTensor4 A(mpoints, meqn, meqn, 2); // Jacobian of flux dTensor5 H(mpoints, meqn, meqn, meqn, 2); // Hessian of flux // Compute q, aux and fvals at each Gaussian Quadrature point // for this current cell indexed by (i,j) // Save results into dTensor2 qvals, auxvals and fvals. for (int m=1; m<= mpoints; m++) { // convert phi_xi and phi_eta derivatives // to phi_x and phi_y derivatives through Jacobian // // Note that: // // pd_x = J11 pd_xi + J12 pd_eta and // pd_y = J21 pd_xi + J22 pd_eta. // // Squaring these operators yields the second derivatives. for (int k=1; k<=kmax_fout; k++) { phi_x.set(m,k, Mesh.get_jmat(i,1,1)*phi_xi.get(m,k) + Mesh.get_jmat(i,1,2)*phi_eta.get(m,k) ); phi_y.set(m,k, Mesh.get_jmat(i,2,1)*phi_xi.get(m,k) + Mesh.get_jmat(i,2,2)*phi_eta.get(m,k) ); phi_xx.set(m,k, Mesh.get_jmat(i,1,1)*Mesh.get_jmat(i,1,1)*phi_xi2.get(m,k) + Mesh.get_jmat(i,1,1)*Mesh.get_jmat(i,1,2)*phi_xieta.get(m,k) + Mesh.get_jmat(i,1,2)*Mesh.get_jmat(i,1,2)*phi_eta2.get(m,k) ); phi_xy.set(m,k, Mesh.get_jmat(i,1,1)*Mesh.get_jmat(i,2,1)*phi_xi2.get(m,k) +(Mesh.get_jmat(i,1,2)*Mesh.get_jmat(i,2,1) + Mesh.get_jmat(i,1,1)*Mesh.get_jmat(i,2,2))*phi_xieta.get(m,k) + Mesh.get_jmat(i,1,2)*Mesh.get_jmat(i,2,2)*phi_eta2.get(m,k) ); phi_yy.set(m,k, Mesh.get_jmat(i,2,1)*Mesh.get_jmat(i,2,1)*phi_xi2.get(m,k) + Mesh.get_jmat(i,2,1)*Mesh.get_jmat(i,2,2)*phi_xieta.get(m,k) + Mesh.get_jmat(i,2,2)*Mesh.get_jmat(i,2,2)*phi_eta2.get(m,k) ); } // point on the unit triangle const double s = spts.get(m,1); const double t = spts.get(m,2); // point on the physical triangle xpts.set(m,1, xc + (x2-x1)*s + (x3-x1)*t ); xpts.set(m,2, yc + (y2-y1)*s + (y3-y1)*t ); // Solution values (q) at each grid point for (int me=1; me<=meqn; me++) { qvals.set(m,me, 0.0 ); for (int k=1; k<=kmax_qin; k++) { qvals.set(m,me, qvals.get(m,me) + phi.get(m,k) * qin->get(i,me,k) ); } } // Auxiliary values (aux) at each grid point for (int ma=1; ma<=maux; ma++) { auxvals.set(m,ma, 0.0 ); for (int k=1; k<=kmax_auxin; k++) { auxvals.set(m,ma, auxvals.get(m,ma) + phi.get(m,k) * auxin->get(i,ma,k) ); } } } // ----------------------------------------------------------------- // // // Part I: // // Project the flux function onto the basis // functions. This is the term of order O( 1 ) in the // "time-averaged" Taylor expansion of f and g. // // ----------------------------------------------------------------- // // Call user-supplied function to set fvals FluxFunc(xpts, qvals, auxvals, fvals); // Evaluate integral on current cell (project onto Legendre basis) // using Gaussian Quadrature for the integration // // TODO - do we want to optimize this by looking into using transposes, // as has been done in the 2d/cart code? (5/14/2014) -DS for (int me=1; me<=mcomps_out; me++) for (int k=1; k<=kmax; k++) { double tmp1 = 0.0; double tmp2 = 0.0; for (int mp=1; mp <= mpoints; mp++) { tmp1 += wgts.get(mp)*fvals.get(mp, me, 1)*phi.get(mp, k); tmp2 += wgts.get(mp)*fvals.get(mp, me, 2)*phi.get(mp, k); } F->set(i, me, k, 2.0*tmp1 ); G->set(i, me, k, 2.0*tmp2 ); } // ----------------------------------------------------------------- // // // Part II: // // Project the derivative of the flux function onto the basis // functions. This is the term of order O( \dt ) in the // "time-averaged" Taylor expansion of f and g. // // ----------------------------------------------------------------- // // ----------------------------------------------------------------- // // Compute pointwise values for fx+gy: // // We can't multiply fvals of f, and g, // by alpha, otherwise we compute the wrong derivative here! // dTensor2 fx_plus_gy( mpoints, meqn ); fx_plus_gy.setall(0.); for( int mp=1; mp <= mpoints; mp++ ) for( int me=1; me <= meqn; me++ ) { double tmp = 0.; for( int k=2; k <= kmax; k++ ) { tmp += F->get( i, me, k ) * phi_x.get( mp, k ); tmp += G->get( i, me, k ) * phi_y.get( mp, k ); } fx_plus_gy.set( mp, me, tmp ); } // Call user-supplied Jacobian to set f'(q) and g'(q): DFluxFunc( xpts, qvals, auxvals, A ); // place-holders for point values of // f'(q)( fx + gy ) and g'(q)( fx + gy ): dTensor2 dt_times_fdot( mpoints, meqn ); dTensor2 dt_times_gdot( mpoints, meqn ); // Compute point values for f'(q) * (fx+gy) and g'(q) * (fx+gy): for( int mp=1; mp <= mpoints; mp++ ) for( int m1=1; m1 <= meqn; m1++ ) { double tmp1 = 0.; double tmp2 = 0.; for( int m2=1; m2 <= meqn; m2++ ) { tmp1 += A.get(mp, m1, m2, 1 ) * fx_plus_gy.get(mp, m2); tmp2 += A.get(mp, m1, m2, 2 ) * fx_plus_gy.get(mp, m2); } dt_times_fdot.set( mp, m1, -beta_dt*tmp1 ); dt_times_gdot.set( mp, m1, -beta_dt*tmp2 ); } // --- Third-order terms --- // // // These are the terms that are O( \dt^2 ) in the "time-averaged" // flux function. dTensor2 f_tt( mpoints, meqn ); f_tt.setall(0.); dTensor2 g_tt( mpoints, meqn ); g_tt.setall(0.); if( mterms > 2 ) { // Construct the Hessian at each (quadrature) point D2FluxFunc( xpts, qvals, auxvals, H ); // Second-order derivative terms dTensor2 qx_vals (mpoints, meqn); qx_vals.setall(0.); dTensor2 qy_vals (mpoints, meqn); qy_vals.setall(0.); dTensor2 fxx_vals(mpoints, meqn); fxx_vals.setall(0.); dTensor2 gxx_vals(mpoints, meqn); gxx_vals.setall(0.); dTensor2 fxy_vals(mpoints, meqn); fxy_vals.setall(0.); dTensor2 gxy_vals(mpoints, meqn); gxy_vals.setall(0.); dTensor2 fyy_vals(mpoints, meqn); fyy_vals.setall(0.); dTensor2 gyy_vals(mpoints, meqn); gyy_vals.setall(0.); for( int m=1; m <= mpoints; m++ ) for( int me=1; me <= meqn; me++ ) { // Can start at k=1, because derivative of a constant is // zero. double tmp_qx = 0.; double tmp_qy = 0.; for( int k=2; k <= kmax; k++ ) { tmp_qx += phi_x.get(m,k) * qin->get(i,me,k); tmp_qy += phi_y.get(m,k) * qin->get(i,me,k); } qx_vals.set(m,me, tmp_qx ); qy_vals.set(m,me, tmp_qy ); // First non-zero terms start at third-order. for( int k=4; k <= kmax; k++ ) { fxx_vals.set(m,me, fxx_vals.get(m,me) + phi_xx.get(m,k)*F->get(i,me,k) ); gxx_vals.set(m,me, gxx_vals.get(m,me) + phi_xx.get(m,k)*G->get(i,me,k) ); fxy_vals.set(m,me, fxy_vals.get(m,me) + phi_xy.get(m,k)*F->get(i,me,k) ); gxy_vals.set(m,me, gxy_vals.get(m,me) + phi_xy.get(m,k)*G->get(i,me,k) ); fyy_vals.set(m,me, fyy_vals.get(m,me) + phi_yy.get(m,k)*F->get(i,me,k) ); gyy_vals.set(m,me, gyy_vals.get(m,me) + phi_yy.get(m,k)*G->get(i,me,k) ); } } // ----------------------------------- // // Part I: Compute (f_x + g_y)_{,t} // ----------------------------------- // // Compute terms that get multiplied by \pd2{ f }{ q } and \pd2{ g }{ q }. dTensor2 fx_plus_gy_t( mpoints, meqn ); for( int m = 1; m <= mpoints; m++ ) for( int me = 1; me <= meqn; me++ ) { double tmp = 0.; // Terms that get multiplied by the Hessian: for( int m1=1; m1 <= meqn; m1++ ) for( int m2=1; m2 <= meqn; m2++ ) { tmp += H.get(m,me,m1,m2,1)*qx_vals.get(m,m1)*fx_plus_gy.get(m,m2); tmp += H.get(m,me,m1,m2,2)*qy_vals.get(m,m1)*fx_plus_gy.get(m,m2); } // Terms that get multiplied by f'(q) and g'(q): for( int m1=1; m1 <= meqn; m1++ ) { tmp += A.get(m,me,m1,1)*( fxx_vals.get(m,m1)+gxy_vals.get(m,m1) ); tmp += A.get(m,me,m1,2)*( fxy_vals.get(m,m1)+gyy_vals.get(m,m1) ); } fx_plus_gy_t.set( m, me, tmp ); } // ----------------------------------- // // Part II: Compute // f'(q) * fx_plus_gy_t and // g'(q) * fx_plus_gy_t // ----------------------------------- // // Add in the third term that gets multiplied by A: for( int m=1; m <= mpoints; m++ ) for( int m1=1; m1 <= meqn; m1++ ) { double tmp1 = 0.; double tmp2 = 0.; for( int m2=1; m2 <= meqn; m2++ ) { tmp1 += A.get(m,m1,m2,1)*fx_plus_gy_t.get(m,m2); tmp2 += A.get(m,m1,m2,2)*fx_plus_gy_t.get(m,m2); } f_tt.set( m, m1, tmp1 ); g_tt.set( m, m1, tmp2 ); } // ----------------------------------------------- // // Part III: Add in contributions from // f''(q) * (fx_plus_gy, fx_plus_gy ) and // g''(q) * (fx_plus_gy, fx_plus_gy ). // ----------------------------------------------- // for( int m =1; m <= mpoints; m++ ) for( int me =1; me <= meqn; me++ ) { double tmp1 = 0.; double tmp2 = 0.; // Terms that get multiplied by the Hessian: for( int m1=1; m1 <= meqn; m1++ ) for( int m2=1; m2 <= meqn; m2++ ) { tmp1 += H.get(m,me,m1,m2,1)*fx_plus_gy.get(m,m1)*fx_plus_gy.get(m,m2); tmp2 += H.get(m,me,m1,m2,2)*fx_plus_gy.get(m,m1)*fx_plus_gy.get(m,m2); } f_tt.set( m, me, f_tt.get(m,me) + tmp1 ); g_tt.set( m, me, g_tt.get(m,me) + tmp2 ); } } // End of computing "third"-order terms // ---------------------------------------------------------- // // // Construct basis coefficients (integrate_on_current_cell) // // ---------------------------------------------------------- // for (int me=1; me<=mcomps_out; me++) for (int k=1; k<=kmax; k++) { double tmp1 = 0.0; double tmp2 = 0.0; for (int mp=1; mp<=mpoints; mp++) { tmp1 += wgts.get(mp)*phi.get(mp,k)*( dt_times_fdot.get(mp, me) + charlie_dt*f_tt.get(mp, me) ); tmp2 += wgts.get(mp)*phi.get(mp,k)*( dt_times_gdot.get(mp, me) + charlie_dt*g_tt.get(mp, me) ); } F->set(i,me,k, F->get(i,me,k) + 2.0*tmp1 ); G->set(i,me,k, G->get(i,me,k) + 2.0*tmp2 ); } } }