int FMMGetEpsilon_PolBasisNV(const Simulation *S, const Layer *L, const int n, std::complex<double> *Epsilon2, std::complex<double> *Epsilon_inv){ double mp1 = 0; int pwr = S->options.lanczos_smoothing_power; if(S->options.use_Lanczos_smoothing){ mp1 = GetLanczosSmoothingOrder(S); S4_TRACE("I Lanczos smoothing order = %f\n", mp1); mp1 *= S->options.lanczos_smoothing_width; } if(Epsilon_inv){} // prevent unused parameter warning const int n2 = 2*n; const int nn = n*n; const double unit_cell_size = Simulation_GetUnitCellSize(S); const int *G = S->solution->G; const int ndim = (0 == S->Lr[2] && 0 == S->Lr[3]) ? 1 : 2; double *ivalues = (double*)S4_malloc(sizeof(double)*(2+10)*(L->pattern.nshapes+1)); double *values = ivalues + 2*(L->pattern.nshapes+1); // Get all the dielectric tensors //bool have_tensor = false; for(int i = -1; i < L->pattern.nshapes; ++i){ const Material *M; if(-1 == i){ M = Simulation_GetMaterialByName(S, L->material, NULL); }else{ M = Simulation_GetMaterialByIndex(S, L->pattern.shapes[i].tag); } if(0 == M->type){ std::complex<double> eps_temp(M->eps.s[0], M->eps.s[1]); //eps_temp = Simulation_GetEpsilonByIndex(S, L->pattern.shapes[i].tag); values[2*(i+1)+0] = eps_temp.real(); values[2*(i+1)+1] = eps_temp.imag(); eps_temp = 1./eps_temp; ivalues[2*(i+1)+0] = eps_temp.real(); ivalues[2*(i+1)+1] = eps_temp.imag(); }else{ //have_tensor = true; } } // Epsilon2 is // [ Epsilon - Delta*Pxx -Delta*Pxy ] // [ -Delta*Pyx Epsilon - Delta*Pyy ] // Pxy = Fourier transform of par_x^* par_y // // Need temp storage for Delta and P__ std::complex<double> *P = Simulation_GetCachedField(S, L); std::complex<double> *work = NULL; std::complex<double> *mDelta = NULL; std::complex<double> *Eta = NULL; if(NULL == P){ // We need to compute the vector field // Make vector fields // Determine size of the vector field grid int ngrid[2] = {1,1}; for(int i = 0; i < 2; ++i){ // choose grid size for(int j = 0; j < n; ++j){ if(abs(G[2*j+i]) > ngrid[i]){ ngrid[i] = abs(G[2*j+i]); } } if(ngrid[i] < 1){ ngrid[i] = 1; } ngrid[i] *= S->options.resolution; ngrid[i] = fft_next_fast_size(ngrid[i]); } const int ng2 = ngrid[0]*ngrid[1]; work = (std::complex<double>*)S4_malloc(sizeof(std::complex<double>)*(6*nn + 4*ng2)); mDelta = work; Eta = mDelta + nn; P = Eta + nn; std::complex<double> *Ffrom = P + 4*nn; // Fourier source std::complex<double> *Fto = Ffrom + ng2; // Fourier dest std::complex<double> *par = Fto + ng2; // real space parallel vector // Generate the vector field const double ing2 = 1./(double)ng2; int ii[2]; double *vfield = (double*)S4_malloc(sizeof(double)*2*ng2); if(0 == S->Lr[2] && 0 == S->Lr[3]){ // 1D, generate the trivial field double nv[2] = {-S->Lr[1], S->Lr[0]}; double nva = hypot(nv[0],nv[1]); nv[0] /= nva; nv[1] /= nva; for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ vfield[2*(ii[0]+ii[1]*ngrid[0])+0] = nv[0]; vfield[2*(ii[0]+ii[1]*ngrid[0])+1] = nv[1]; } } }else{ int error = 0; S4_VERB(1, "Generating polarization vector field of size %d x %d\n", ngrid[0], ngrid[1]); error = Pattern_GenerateFlowField(&L->pattern, 0, S->Lr, ngrid[0], ngrid[1], vfield); // Normalize the field for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ double a = hypot( vfield[2*(ii[0]+ii[1]*ngrid[0])+0], vfield[2*(ii[0]+ii[1]*ngrid[0])+1]); if(a > 0){ vfield[2*(ii[0]+ii[1]*ngrid[0])+0] /= a; vfield[2*(ii[0]+ii[1]*ngrid[0])+1] /= a; }else{ vfield[2*(ii[0]+ii[1]*ngrid[0])+0] = 1; vfield[2*(ii[0]+ii[1]*ngrid[0])+1] = 0; } } } if(0 != error){ S4_TRACE("< Simulation_ComputeLayerBands (failed; Pattern_GenerateFlowField returned %d) [omega=%f]\n", error, S->omega[0]); if(NULL != vfield){ S4_free(vfield); } if(NULL != work){ S4_free(work); } if(NULL != ivalues){ S4_free(ivalues); } return error; } } if(NULL != S->options.vector_field_dump_filename_prefix){ const char *layer_name = NULL != L->name ? L->name : ""; const size_t prefix_len = strlen(S->options.vector_field_dump_filename_prefix); char *filename = (char*)malloc(sizeof(char) * (prefix_len + strlen(layer_name) + 1)); strcpy(filename, S->options.vector_field_dump_filename_prefix); strcpy(filename+prefix_len, layer_name); FILE *fp = fopen(filename, "wb"); if(NULL != fp){ for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ fprintf(fp, "%d\t%d\t%f\t%f\n", ii[0], ii[1], vfield[2*(ii[0]+ii[1]*ngrid[0])+0], vfield[2*(ii[0]+ii[1]*ngrid[0])+1]); } fprintf(fp, "\n"); } fclose(fp); } free(filename); } for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ par[2*(ii[0]+ii[1]*ngrid[0])+0] = vfield[2*(ii[0]+ii[1]*ngrid[0])+0]; par[2*(ii[0]+ii[1]*ngrid[0])+1] = vfield[2*(ii[0]+ii[1]*ngrid[0])+1]; } } fft_plan plan = fft_plan_dft_2d(ngrid, Ffrom, Fto, 1); // We fill in the quarter blocks of F in Fortran order for(int w = 0; w < 4; ++w){ int Erow = (w&1 ? n : 0); int Ecol = (w&2 ? n : 0); int _1 = (w&1); int _2 = ((w&2)>>1); for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ const int si1 = ii[1] >= ngrid[1]/2 ? ii[1]-ngrid[1]/2 : ii[1]+ngrid[1]/2; for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ const int si0 = ii[0] >= ngrid[0]/2 ? ii[0]-ngrid[0]/2 : ii[0]+ngrid[0]/2; Ffrom[si1+si0*ngrid[1]] = par[2*(ii[0]+ii[1]*ngrid[0])+_1]*par[2*(ii[0]+ii[1]*ngrid[0])+_2]; } } fft_plan_exec(plan); for(int j = 0; j < n; ++j){ for(int i = 0; i < n; ++i){ int f[2] = {G[2*i+0]-G[2*j+0],G[2*i+1]-G[2*j+1]}; if(f[0] < 0){ f[0] += ngrid[0]; } if(f[1] < 0){ f[1] += ngrid[1]; } P[Erow+i+(Ecol+j)*n2] = ing2 * Fto[f[1]+f[0]*ngrid[1]]; } } } fft_plan_destroy(plan); //free(fftcfg); if(NULL != vfield){ S4_free(vfield); } // Add to cache Simulation_AddFieldToCache((Simulation*)S, L, S->n_G, P, 4*nn); }else{
int FMMGetEpsilon_Experimental(const S4_Simulation *S, const S4_Layer *L, const int n, std::complex<double> *Epsilon2, std::complex<double> *Epsilon_inv){ const int n2 = 2*n; const int *G = S->G; const int ndim = (0 == S->Lr[2] && 0 == S->Lr[3]) ? 1 : 2; double *ivalues = (double*)S4_malloc(sizeof(double)*(2+10)*(L->pattern.nshapes+1)); double *values = ivalues + 2*(L->pattern.nshapes+1); S4_TRACE("I Experimental epsilon\n"); // Get all the dielectric tensors bool have_tensor = false; for(int i = -1; i < L->pattern.nshapes; ++i){ const S4_Material *M; if(-1 == i){ M = &S->material[L->material]; }else{ M = &S->material[L->pattern.shapes[i].tag]; } if(0 == M->type){ std::complex<double> eps_temp(M->eps.s[0], M->eps.s[1]); //eps_temp = Simulation_GetEpsilonByIndex(S, L->pattern.shapes[i].tag); values[2*(i+1)+0] = eps_temp.real(); values[2*(i+1)+1] = eps_temp.imag(); eps_temp = 1./eps_temp; ivalues[2*(i+1)+0] = eps_temp.real(); ivalues[2*(i+1)+1] = eps_temp.imag(); }else{ have_tensor = true; } } const double unit_cell_size = Simulation_GetUnitCellSize(S); if(!have_tensor){ // Make Epsilon for(int j = 0; j < n; ++j){ for(int i = 0; i < n; ++i){ int dG[2] = {G[2*i+0]-G[2*j+0],G[2*i+1]-G[2*j+1]}; double f[2] = { dG[0] * S->Lk[0] + dG[1] * S->Lk[2], dG[0] * S->Lk[1] + dG[1] * S->Lk[3] }; double ft[2]; Pattern_GetFourierTransform(&L->pattern, values, f, ndim, unit_cell_size, ft); Epsilon2[i+j*n2] = std::complex<double>(ft[0],ft[1]); } } // Make Epsilon_inv for(int j = 0; j < n; ++j){ for(int i = 0; i < n; ++i){ int dG[2] = {G[2*i+0]-G[2*j+0],G[2*i+1]-G[2*j+1]}; double f[2] = { dG[0] * S->Lk[0] + dG[1] * S->Lk[2], dG[0] * S->Lk[1] + dG[1] * S->Lk[3] }; double ft[2]; Pattern_GetFourierTransform(&L->pattern, ivalues, f, ndim, unit_cell_size, ft); Epsilon_inv[i+j*n] = std::complex<double>(ft[0],ft[1]); } } S4_TRACE("I Epsilon(0,0) = %f,%f [omega=%f]\n", Epsilon2[0].real(), Epsilon2[0].imag(), S->omega[0]); // Upper block of diagonal of Epsilon2 is already Epsilon RNP::TBLAS::CopyMatrix<'A'>(n,n,&Epsilon2[0+0*n2],n2, &Epsilon2[n+n*n2],n2); RNP::TBLAS::SetMatrix<'A'>(n,n, 0.,0., &Epsilon2[n+0*n2],n2); RNP::TBLAS::SetMatrix<'A'>(n,n, 0.,0., &Epsilon2[0+n*n2],n2); // Epsilon2 has Epsilon's on its diagonal }else{ // have tensor dielectric const int ldv = 2*(1+L->pattern.nshapes); for(int i = -1; i < L->pattern.nshapes; ++i){ const S4_Material *M; if(-1 == i){ M = &S->material[L->material]; }else{ M = &S->material[L->pattern.shapes[i].tag]; } if(0 == M->type){ const std::complex<double> eps_temp(M->eps.s[0], M->eps.s[1]); const std::complex<double> inveps_temp = 1./eps_temp; values[0*ldv+2*(i+1)+0] = eps_temp.real(); values[0*ldv+2*(i+1)+1] = eps_temp.imag(); values[1*ldv+2*(i+1)+0] = 0; values[1*ldv+2*(i+1)+1] = 0; values[2*ldv+2*(i+1)+0] = 0; values[2*ldv+2*(i+1)+1] = 0; values[3*ldv+2*(i+1)+0] = eps_temp.real(); values[3*ldv+2*(i+1)+1] = eps_temp.imag(); values[4*ldv+2*(i+1)+0] = eps_temp.real(); values[4*ldv+2*(i+1)+1] = eps_temp.imag(); ivalues[0*ldv+2*(i+1)+0] = inveps_temp.real(); ivalues[0*ldv+2*(i+1)+1] = inveps_temp.imag(); }else{ std::complex<double> eps_temp(M->eps.abcde[8], M->eps.abcde[9]); const std::complex<double> inveps_temp = 1./eps_temp; // We must transpose the values array here, as well as transpose the tensor values[0*ldv+2*(i+1)+0] = M->eps.abcde[0]; values[0*ldv+2*(i+1)+1] = M->eps.abcde[1]; values[1*ldv+2*(i+1)+0] = M->eps.abcde[4]; values[1*ldv+2*(i+1)+1] = M->eps.abcde[5]; values[2*ldv+2*(i+1)+0] = M->eps.abcde[2]; values[2*ldv+2*(i+1)+1] = M->eps.abcde[3]; values[3*ldv+2*(i+1)+0] = M->eps.abcde[6]; values[3*ldv+2*(i+1)+1] = M->eps.abcde[7]; values[4*ldv+2*(i+1)+0] = M->eps.abcde[8]; values[4*ldv+2*(i+1)+1] = M->eps.abcde[9]; ivalues[0*ldv+2*(i+1)+0] = inveps_temp.real(); ivalues[0*ldv+2*(i+1)+1] = inveps_temp.imag(); } } for(int k = -1; k < 4; ++k){ if(-1 == k){ for(int j = 0; j < n; ++j){ for(int i = 0; i < n; ++i){ int dG[2] = {G[2*i+0]-G[2*j+0],G[2*i+1]-G[2*j+1]}; double f[2] = { dG[0] * S->Lk[0] + dG[1] * S->Lk[2], dG[0] * S->Lk[1] + dG[1] * S->Lk[3] }; double ft[2]; Pattern_GetFourierTransform(&L->pattern, ivalues, f, ndim, unit_cell_size, ft); Epsilon_inv[i+j*n] = std::complex<double>(ft[0],ft[1]); } } }else{ const int ib = k&1 ? n : 0; const int jb = k&2 ? n : 0; for(int j = 0; j < n; ++j){ for(int i = 0; i < n; ++i){ int dG[2] = {G[2*i+0]-G[2*j+0],G[2*i+1]-G[2*j+1]}; double f[2] = { dG[0] * S->Lk[0] + dG[1] * S->Lk[2], dG[0] * S->Lk[1] + dG[1] * S->Lk[3] }; double ft[2]; Pattern_GetFourierTransform(&L->pattern, &values[k*ldv], f, ndim, unit_cell_size, ft); Epsilon2[ib+i+(jb+j)*n2] = std::complex<double>(ft[0],ft[1]); } } } } } S4_free(ivalues); return 0; }