int FMMGetEpsilon_PolBasisNV(const Simulation *S, const Layer *L, const int n, std::complex<double> *Epsilon2, std::complex<double> *Epsilon_inv){ double mp1 = 0; int pwr = S->options.lanczos_smoothing_power; if(S->options.use_Lanczos_smoothing){ mp1 = GetLanczosSmoothingOrder(S); S4_TRACE("I Lanczos smoothing order = %f\n", mp1); mp1 *= S->options.lanczos_smoothing_width; } if(Epsilon_inv){} // prevent unused parameter warning const int n2 = 2*n; const int nn = n*n; const double unit_cell_size = Simulation_GetUnitCellSize(S); const int *G = S->solution->G; const int ndim = (0 == S->Lr[2] && 0 == S->Lr[3]) ? 1 : 2; double *ivalues = (double*)S4_malloc(sizeof(double)*(2+10)*(L->pattern.nshapes+1)); double *values = ivalues + 2*(L->pattern.nshapes+1); // Get all the dielectric tensors //bool have_tensor = false; for(int i = -1; i < L->pattern.nshapes; ++i){ const Material *M; if(-1 == i){ M = Simulation_GetMaterialByName(S, L->material, NULL); }else{ M = Simulation_GetMaterialByIndex(S, L->pattern.shapes[i].tag); } if(0 == M->type){ std::complex<double> eps_temp(M->eps.s[0], M->eps.s[1]); //eps_temp = Simulation_GetEpsilonByIndex(S, L->pattern.shapes[i].tag); values[2*(i+1)+0] = eps_temp.real(); values[2*(i+1)+1] = eps_temp.imag(); eps_temp = 1./eps_temp; ivalues[2*(i+1)+0] = eps_temp.real(); ivalues[2*(i+1)+1] = eps_temp.imag(); }else{ //have_tensor = true; } } // Epsilon2 is // [ Epsilon - Delta*Pxx -Delta*Pxy ] // [ -Delta*Pyx Epsilon - Delta*Pyy ] // Pxy = Fourier transform of par_x^* par_y // // Need temp storage for Delta and P__ std::complex<double> *P = Simulation_GetCachedField(S, L); std::complex<double> *work = NULL; std::complex<double> *mDelta = NULL; std::complex<double> *Eta = NULL; if(NULL == P){ // We need to compute the vector field // Make vector fields // Determine size of the vector field grid int ngrid[2] = {1,1}; for(int i = 0; i < 2; ++i){ // choose grid size for(int j = 0; j < n; ++j){ if(abs(G[2*j+i]) > ngrid[i]){ ngrid[i] = abs(G[2*j+i]); } } if(ngrid[i] < 1){ ngrid[i] = 1; } ngrid[i] *= S->options.resolution; ngrid[i] = fft_next_fast_size(ngrid[i]); } const int ng2 = ngrid[0]*ngrid[1]; work = (std::complex<double>*)S4_malloc(sizeof(std::complex<double>)*(6*nn + 4*ng2)); mDelta = work; Eta = mDelta + nn; P = Eta + nn; std::complex<double> *Ffrom = P + 4*nn; // Fourier source std::complex<double> *Fto = Ffrom + ng2; // Fourier dest std::complex<double> *par = Fto + ng2; // real space parallel vector // Generate the vector field const double ing2 = 1./(double)ng2; int ii[2]; double *vfield = (double*)S4_malloc(sizeof(double)*2*ng2); if(0 == S->Lr[2] && 0 == S->Lr[3]){ // 1D, generate the trivial field double nv[2] = {-S->Lr[1], S->Lr[0]}; double nva = hypot(nv[0],nv[1]); nv[0] /= nva; nv[1] /= nva; for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ vfield[2*(ii[0]+ii[1]*ngrid[0])+0] = nv[0]; vfield[2*(ii[0]+ii[1]*ngrid[0])+1] = nv[1]; } } }else{ int error = 0; S4_VERB(1, "Generating polarization vector field of size %d x %d\n", ngrid[0], ngrid[1]); error = Pattern_GenerateFlowField(&L->pattern, 0, S->Lr, ngrid[0], ngrid[1], vfield); // Normalize the field for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ double a = hypot( vfield[2*(ii[0]+ii[1]*ngrid[0])+0], vfield[2*(ii[0]+ii[1]*ngrid[0])+1]); if(a > 0){ vfield[2*(ii[0]+ii[1]*ngrid[0])+0] /= a; vfield[2*(ii[0]+ii[1]*ngrid[0])+1] /= a; }else{ vfield[2*(ii[0]+ii[1]*ngrid[0])+0] = 1; vfield[2*(ii[0]+ii[1]*ngrid[0])+1] = 0; } } } if(0 != error){ S4_TRACE("< Simulation_ComputeLayerBands (failed; Pattern_GenerateFlowField returned %d) [omega=%f]\n", error, S->omega[0]); if(NULL != vfield){ S4_free(vfield); } if(NULL != work){ S4_free(work); } if(NULL != ivalues){ S4_free(ivalues); } return error; } } if(NULL != S->options.vector_field_dump_filename_prefix){ const char *layer_name = NULL != L->name ? L->name : ""; const size_t prefix_len = strlen(S->options.vector_field_dump_filename_prefix); char *filename = (char*)malloc(sizeof(char) * (prefix_len + strlen(layer_name) + 1)); strcpy(filename, S->options.vector_field_dump_filename_prefix); strcpy(filename+prefix_len, layer_name); FILE *fp = fopen(filename, "wb"); if(NULL != fp){ for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ fprintf(fp, "%d\t%d\t%f\t%f\n", ii[0], ii[1], vfield[2*(ii[0]+ii[1]*ngrid[0])+0], vfield[2*(ii[0]+ii[1]*ngrid[0])+1]); } fprintf(fp, "\n"); } fclose(fp); } free(filename); } for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ par[2*(ii[0]+ii[1]*ngrid[0])+0] = vfield[2*(ii[0]+ii[1]*ngrid[0])+0]; par[2*(ii[0]+ii[1]*ngrid[0])+1] = vfield[2*(ii[0]+ii[1]*ngrid[0])+1]; } } fft_plan plan = fft_plan_dft_2d(ngrid, Ffrom, Fto, 1); // We fill in the quarter blocks of F in Fortran order for(int w = 0; w < 4; ++w){ int Erow = (w&1 ? n : 0); int Ecol = (w&2 ? n : 0); int _1 = (w&1); int _2 = ((w&2)>>1); for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ const int si1 = ii[1] >= ngrid[1]/2 ? ii[1]-ngrid[1]/2 : ii[1]+ngrid[1]/2; for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ const int si0 = ii[0] >= ngrid[0]/2 ? ii[0]-ngrid[0]/2 : ii[0]+ngrid[0]/2; Ffrom[si1+si0*ngrid[1]] = par[2*(ii[0]+ii[1]*ngrid[0])+_1]*par[2*(ii[0]+ii[1]*ngrid[0])+_2]; } } fft_plan_exec(plan); for(int j = 0; j < n; ++j){ for(int i = 0; i < n; ++i){ int f[2] = {G[2*i+0]-G[2*j+0],G[2*i+1]-G[2*j+1]}; if(f[0] < 0){ f[0] += ngrid[0]; } if(f[1] < 0){ f[1] += ngrid[1]; } P[Erow+i+(Ecol+j)*n2] = ing2 * Fto[f[1]+f[0]*ngrid[1]]; } } } fft_plan_destroy(plan); //free(fftcfg); if(NULL != vfield){ S4_free(vfield); } // Add to cache Simulation_AddFieldToCache((Simulation*)S, L, S->n_G, P, 4*nn); }else{
int FMMGetEpsilon_Kottke(const S4_Simulation *S, const S4_Layer *L, const int n, std::complex<double> *Epsilon2, std::complex<double> *Epsilon_inv){ const int n2 = 2*n; const int *G = S->G; // Make grid // Determine size of the grid int ngrid[2] = {1,1}; for(int i = 0; i < 2; ++i){ // choose grid size for(int j = 0; j < n; ++j){ if(abs(G[2*j+i]) > ngrid[i]){ ngrid[i] = abs(G[2*j+i]); } } if(ngrid[i] < 1){ ngrid[i] = 1; } ngrid[i] *= S->options.resolution; ngrid[i] = fft_next_fast_size(ngrid[i]); } S4_TRACE("I Subpixel smoothing on %d x %d grid\n", ngrid[0], ngrid[1]); const int ng2 = ngrid[0]*ngrid[1]; const double ing2 = 1./(double)ng2; // The grid needs to hold 5 matrix elements: xx,xy,yx,yy,zz // We actually make 5 different grids to facilitate the fft routines //std::complex<double> *work = (std::complex<double>*)S4_malloc(sizeof(std::complex<double>)*(6*ng2)); std::complex<double> *work = fft_alloc_complex(6*ng2); std::complex<double>*fxx = work; std::complex<double>*fxy = fxx + ng2; std::complex<double>*fyx = fxy + ng2; std::complex<double>*fyy = fyx + ng2; std::complex<double>*fzz = fyy + ng2; std::complex<double>*Fto = fzz + ng2; //memset(work, 0, sizeof(std::complex<double>) * 6*ng2); double *discval = (double*)S4_malloc(sizeof(double)*(L->pattern.nshapes+1)); fft_plan plans[5]; for(int i = 0; i <= 4; ++i){ plans[i] = fft_plan_dft_2d(ngrid, fxx+i*ng2, Fto, 1); } int ii[2]; for(ii[0] = 0; ii[0] < ngrid[0]; ++ii[0]){ const int si0 = ii[0] >= ngrid[0]/2 ? ii[0]-ngrid[0]/2 : ii[0]+ngrid[0]/2; for(ii[1] = 0; ii[1] < ngrid[1]; ++ii[1]){ const int si1 = ii[1] >= ngrid[1]/2 ? ii[1]-ngrid[1]/2 : ii[1]+ngrid[1]/2; Pattern_DiscretizeCell(&L->pattern, S->Lr, ngrid[0], ngrid[1], ii[0], ii[1], discval); int nnz = 0; int imat[2] = {-1,-1}; for(int i = 0; i <= L->pattern.nshapes; ++i){ if(fabs(discval[i]) > 2*std::numeric_limits<double>::epsilon()){ if(0 == nnz){ imat[0] = i; } else if(1 == nnz){ imat[1] = i; } ++nnz; } } //S4_TRACE("I %d,%d nnz = %d\n", ii[0], ii[1], nnz); if(nnz < 2){ // just one material //fprintf(stderr, "%d\t%d\t0\t0\n", ii[0], ii[1]); const S4_Material *M; if(0 == imat[0]){ M = &S->material[L->material]; }else{ M = &S->material[L->pattern.shapes[imat[0]-1].tag]; } if(0 == M->type){ std::complex<double> eps_scalar(M->eps.s[0], M->eps.s[1]); fxx[si1+si0*ngrid[1]] = eps_scalar; fxy[si1+si0*ngrid[1]] = 0; fyx[si1+si0*ngrid[1]] = 0; fyy[si1+si0*ngrid[1]] = eps_scalar; fzz[si1+si0*ngrid[1]] = eps_scalar; }else{ fxx[si1+si0*ngrid[1]] = std::complex<double>(M->eps.abcde[0],M->eps.abcde[1]); fyy[si1+si0*ngrid[1]] = std::complex<double>(M->eps.abcde[6],M->eps.abcde[7]); fxy[si1+si0*ngrid[1]] = std::complex<double>(M->eps.abcde[2],M->eps.abcde[3]); fyx[si1+si0*ngrid[1]] = std::complex<double>(M->eps.abcde[4],M->eps.abcde[5]); fzz[si1+si0*ngrid[1]] = std::complex<double>(M->eps.abcde[8],M->eps.abcde[9]); } }else{ if(imat[1] > imat[0]){ std::swap(imat[0],imat[1]); } // imat[0] is the more-contained shape double nvec[2]; const double nxvec[2] = { ((double)ii[0]+0.5)/(double)ngrid[0]-0.5, ((double)ii[1]+0.5)/(double)ngrid[1]-0.5 }; const double xvec[2] = { // center of current parallelogramic pixel S->Lr[0]*nxvec[0] + S->Lr[2]*nxvec[1], S->Lr[1]*nxvec[0] + S->Lr[3]*nxvec[1] }; shape_get_normal(&(L->pattern.shapes[imat[0]-1]), xvec, nvec); if(2 == nnz && imat[1] != imat[0] && !(0 == nvec[0] && 0 == nvec[1])){ // use Kottke averaging //fprintf(stderr, "%d\t%d\t%f\t%f\n", ii[0], ii[1], nvec[0], nvec[1]); const double fill = discval[imat[0]]; // Get the two tensors std::complex<double> abcde[2][5]; for(int i = 0; i < 2; ++i){ const S4_Material *M; if(0 == imat[i]){ M = &S->material[L->material]; }else{ M = &S->material[L->pattern.shapes[imat[i]-1].tag]; } if(0 == M->type){ std::complex<double> eps_scalar(M->eps.s[0], M->eps.s[1]); abcde[i][0] = eps_scalar; abcde[i][1] = 0; abcde[i][2] = 0; abcde[i][3] = eps_scalar; abcde[i][4] = eps_scalar; }else{ for(int j = 0; j < 4; ++j){ abcde[i][j] = std::complex<double>(M->eps.abcde[2*j+0],M->eps.abcde[2*j+1]); } abcde[i][4] = std::complex<double>(M->eps.abcde[8],M->eps.abcde[9]); } } // The zz component is just directly obtained by averaging fzz[si1+si0*ngrid[1]] = discval[imat[0]] * abcde[0][4] + discval[imat[1]] * abcde[1][4]; // Compute rotated tensors // abcd1 = Rot^T abcd1 Rot // Rot = [ nvec[0] -nvec[1] ] // [ nvec[1] nvec[0] ] std::complex<double> abcd[2][4]; //fprintf(stderr, " nvec = {%f,%f}, fill = %f\n", nvec[0], nvec[1], fill); //fprintf(stderr, " abcde[0] = {%f,%f,%f,%f}\n", abcde[0][0].real(), abcde[0][1].real(), abcde[0][2].real(), abcde[0][3].real()); //fprintf(stderr, " abcde[1] = {%f,%f,%f,%f}\n", abcde[1][0].real(), abcde[1][1].real(), abcde[1][2].real(), abcde[1][3].real()); sym2x2rot(abcde[0], nvec, abcd[0]); sym2x2rot(abcde[1], nvec, abcd[1]); // Compute the average tau tensor into abcde[0][0-3] // tau(e__) = [ -1/e11 e12/e11 ] // [ e21/e11 e22 - e21 e12/e11 ] abcde[0][0] = fill * (-1./abcd[0][0]) + (1.-fill) * (-1./abcd[1][0]); abcde[0][1] = fill * (abcd[0][1]/abcd[0][0]) + (1.-fill) * (abcd[1][1]/abcd[1][0]); abcde[0][2] = fill * (abcd[0][2]/abcd[0][0]) + (1.-fill) * (abcd[1][2]/abcd[1][0]); abcde[0][3] = fill * (abcd[0][3]-abcd[0][1]*abcd[0][2]/abcd[0][0]) + (1.-fill) * (abcd[1][3]-abcd[1][1]*abcd[1][2]/abcd[1][0]); // Invert the tau transform into abcd[1] // invtau(t__) = [ -1/t11 -t12/t11 ] // [ -t21/t11 t22 - t21 t12/t11 ] abcd[1][0] = -1./abcde[0][0]; abcd[1][1] = -abcde[0][1]/abcde[0][0]; abcd[1][2] = -abcde[0][2]/abcde[0][0]; abcd[1][3] = abcde[0][3] - abcde[0][1]*abcde[0][2]/abcde[0][0]; //fprintf(stderr, " abcd[1] = {%f,%f,%f,%f}\n", abcd[1][0].real(), abcd[1][1].real(), abcd[1][2].real(), abcd[1][3].real()); // Unrotate abcd[1] into abcd[0] nvec[1] = -nvec[1]; sym2x2rot(abcd[1], nvec, abcd[0]); //fprintf(stderr, " abcd[0] = {%f,%f,%f,%f}\n\n", abcd[0][0].real(), abcd[0][1].real(), abcd[0][2].real(), abcd[0][3].real()); fxx[si1+si0*ngrid[1]] = abcd[0][0]; fxy[si1+si0*ngrid[1]] = abcd[0][1]; fyx[si1+si0*ngrid[1]] = abcd[0][2]; fyy[si1+si0*ngrid[1]] = abcd[0][3]; }else{ // too many, just use the area weighting //fprintf(stderr, "%d\t%d\t3\n", ii[0], ii[1]); fxx[si1+si0*ngrid[1]] = 0; fxy[si1+si0*ngrid[1]] = 0; fyx[si1+si0*ngrid[1]] = 0; fyy[si1+si0*ngrid[1]] = 0; fzz[si1+si0*ngrid[1]] = 0; for(int i = 0; i <= L->pattern.nshapes; ++i){ if(0 == discval[i]){ continue; } int j = i-1; const S4_Material *M; if(-1 == j){ M = &S->material[L->material]; }else{ M = &S->material[L->pattern.shapes[j].tag]; } if(0 == M->type){ std::complex<double> eps_scalar(M->eps.s[0], M->eps.s[1]); fxx[si1+si0*ngrid[0]] += discval[i]*eps_scalar; fyy[si1+si0*ngrid[0]] += discval[i]*eps_scalar; fzz[si1+si0*ngrid[0]] += discval[i]*eps_scalar; }else{ std::complex<double> ea(M->eps.abcde[0],M->eps.abcde[1]); std::complex<double> eb(M->eps.abcde[2],M->eps.abcde[3]); std::complex<double> ec(M->eps.abcde[4],M->eps.abcde[5]); std::complex<double> ed(M->eps.abcde[6],M->eps.abcde[7]); fxx[si1+si0*ngrid[1]] += discval[i]*ea; fxy[si1+si0*ngrid[1]] += discval[i]*eb; fyx[si1+si0*ngrid[1]] += discval[i]*ec; fyy[si1+si0*ngrid[1]] += discval[i]*ed; fzz[si1+si0*ngrid[1]] += discval[i]*std::complex<double>(M->eps.abcde[8],M->eps.abcde[9]); } } } } /* fprintf(stderr, "%d\t%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", ii[0], ii[1], fxx[si1+si0*ngrid[1]].real(), fxx[si1+si0*ngrid[1]].imag(), fxy[si1+si0*ngrid[1]].real(), fxy[si1+si0*ngrid[1]].imag(), fyx[si1+si0*ngrid[1]].real(), fyx[si1+si0*ngrid[1]].imag(), fyy[si1+si0*ngrid[1]].real(), fyy[si1+si0*ngrid[1]].imag(), fzz[si1+si0*ngrid[1]].real(), fzz[si1+si0*ngrid[1]].imag() );//*/ } //fprintf(stderr, "\n"); } // Make Epsilon_inv first { //fprintf(stderr, "fzz1[0] = %f+I %f\n", fzz[0].real(), fzz[0].imag()); //memset(Fto, 0, sizeof(std::complex<double>)*ng2); fft_plan_exec(plans[4]); //fprintf(stderr, "fzz2[0] = %f+I %f\n", fzz[0].real(), fzz[0].imag()); //fprintf(stderr, "Fto[0] = %f+I %f\n", Fto[0].real(), Fto[0].imag()); for(int j = 0; j < n; ++j){ for(int i = 0; i < n; ++i){ int f[2] = {G[2*i+0]-G[2*j+0],G[2*i+1]-G[2*j+1]}; if(f[0] < 0){ f[0] += ngrid[0]; } if(f[1] < 0){ f[1] += ngrid[1]; } Epsilon2[i+j*n] = ing2 * Fto[f[1]+f[0]*ngrid[1]]; } } } //fprintf(stderr, "Epsilon2[0] = %f+I %f\n", Epsilon2[0].real(), Epsilon2[0].imag()); // Epsilon_inv needs inverting RNP::TBLAS::SetMatrix<'A'>(n,n, 0.,1., Epsilon_inv,n); int solve_info; RNP::LinearSolve<'N'>(n,n, Epsilon2,n, Epsilon_inv,n, &solve_info, NULL); //fprintf(stderr, "Epsilon_inv[0] = %f+I %f\n", Epsilon_inv[0].real(), Epsilon_inv[0].imag()); // We fill in the quarter blocks of F in Fortran order for(int w = 0; w < 4; ++w){ int Ecol = (w&1 ? n : 0); int Erow = (w&2 ? n : 0); //memset(Fto, 0, sizeof(std::complex<double>)*ng2); fft_plan_exec(plans[w]); //fprintf(stderr, "Fto(%d)[0] = %f+I %f\n", w, Fto[0].real(), Fto[0].imag()); for(int j = 0; j < n; ++j){ for(int i = 0; i < n; ++i){ int f[2] = {G[2*i+0]-G[2*j+0],G[2*i+1]-G[2*j+1]}; if(f[0] < 0){ f[0] += ngrid[0]; } if(f[1] < 0){ f[1] += ngrid[1]; } Epsilon2[Erow+i+(Ecol+j)*n2] = ing2 * Fto[f[1]+f[0]*ngrid[1]]; } } } //fprintf(stderr, "Epsilon2[0] = %f+I %f\n", Epsilon2[0].real(), Epsilon2[0].imag()); for(int i = 0; i <= 4; ++i){ fft_plan_destroy(plans[i]); } S4_free(discval); //S4_free(work); fft_free(work); return 0; }