// ----------------------------------------------------------------- // Given matrix in = P.u, calculate the unitary matrix u = [1 / P].in // and the positive P = sqrt[in.in^dag] // We diagonalize PSq = in.in^dag using LAPACK, // then project out its inverse square root void polar(matrix *in, matrix *u, matrix *P) { char V = 'V'; // Ask LAPACK for both eigenvalues and eigenvectors char U = 'U'; // Have LAPACK store upper triangle of U.Ubar int row, col, Npt = NCOL, stat = 0, Nwork = 2 * NCOL; matrix PSq, Pinv, tmat; // Convert PSq to column-major double array used by LAPACK mult_na(in, in, &PSq); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { store[2 * (col * NCOL + row)] = PSq.e[row][col].real; store[2 * (col * NCOL + row) + 1] = PSq.e[row][col].imag; } } // Compute eigenvalues and eigenvectors of PSq zheev_(&V, &U, &Npt, store, &Npt, eigs, work, &Nwork, Rwork, &stat); // Check for degenerate eigenvalues (broke previous Jacobi algorithm) for (row = 0; row < NCOL; row++) { for (col = row + 1; col < NCOL; col++) { if (fabs(eigs[row] - eigs[col]) < IMAG_TOL) printf("WARNING: w[%d] = w[%d] = %.8g\n", row, col, eigs[row]); } } // Move the results back into matrix structures // Overwrite PSq to hold the eigenvectors for projection for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { PSq.e[row][col].real = store[2 * (col * NCOL + row)]; PSq.e[row][col].imag = store[2 * (col * NCOL + row) + 1]; P->e[row][col] = cmplx(0.0, 0.0); Pinv.e[row][col] = cmplx(0.0, 0.0); } P->e[row][row].real = sqrt(eigs[row]); Pinv.e[row][row].real = 1.0 / sqrt(eigs[row]); } mult_na(P, &PSq, &tmat); mult_nn(&PSq, &tmat, P); // Now project out 1 / sqrt[in.in^dag] to find u = [1 / P].in mult_na(&Pinv, &PSq, &tmat); mult_nn(&PSq, &tmat, &Pinv); mult_nn(&Pinv, in, u); #ifdef DEBUG_CHECK // Check unitarity of u mult_na(u, u, &PSq); c_scalar_add_diag(&PSq, &minus1); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { if (cabs_sq(&(PSq.e[row][col])) > SQ_TOL) { printf("Error getting unitary piece: "); printf("%.4g > %.4g for [%d, %d]\n", cabs(&(PSq.e[row][col])), IMAG_TOL, row, col); dumpmat(in); dumpmat(u); dumpmat(P); return; } } } #endif #ifdef DEBUG_CHECK // Check hermiticity of P adjoint(P, &tmat); sub_matrix(P, &tmat, &PSq); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { if (cabs_sq(&(PSq.e[row][col])) > SQ_TOL) { printf("Error getting hermitian piece: "); printf("%.4g > %.4g for [%d, %d]\n", cabs(&(PSq.e[row][col])), IMAG_TOL, row, col); dumpmat(in); dumpmat(u); dumpmat(P); return; } } } #endif #ifdef DEBUG_CHECK // Check that in = P.u mult_nn(P, u, &tmat); sub_matrix(in, &tmat, &PSq); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { if (cabs_sq(&(PSq.e[row][col])) > SQ_TOL) { printf("Error reconstructing initial matrix: "); printf("%.4g > %.4g for [%d, %d]\n", cabs(&(PSq.e[row][col])), IMAG_TOL, row, col); dumpmat(in); dumpmat(u); dumpmat(P); return; } } } #endif }
void setup_lambda() { int i, j, k, l, count; complex inv_sqrt = cmplx(1.0 / sqrt(2.0), 0.0); complex i_inv_sqrt = cmplx(0.0, 1.0 / sqrt(2.0)); #ifdef DEBUG_CHECK int a; complex trace, tt; node0_printf("Computing generators for U(N)\n"); #endif // Make sure Lambda matrices are initialized for (i = 0; i < DIMF; i++) clear_mat(&(Lambda[i])); // N * (N - 1) off-diagonal SU(N) generators // (T^{ij, +})_{kl} = i * (de_{ki} de_{lj} + de_{kj} de_{li}) / sqrt(2) // (T^{ij, -})_{kl} = (de_{ki} de_{lj} - de_{kj} de_{ki}) / sqrt(2) // Sign in second chosen to match previous values count = 0; for (i = 0; i < NCOL; i++) { for (j = i + 1; j < NCOL; j++) { for (k = 0; k < NCOL; k++) { for (l = 0; l < NCOL; l++) { if (k == i && l == j) { CSUM(Lambda[count].e[k][l], i_inv_sqrt); CSUM(Lambda[count + 1].e[k][l], inv_sqrt); } else if (k == j && l == i) { CSUM(Lambda[count].e[k][l], i_inv_sqrt); CDIF(Lambda[count + 1].e[k][l], inv_sqrt); } } } count += 2; } } if (count != NCOL * (NCOL - 1)) { node0_printf("ERROR: Wrong number of off-diagonal generators, "); node0_printf("%d vs. %d\n", count, NCOL * (NCOL - 1)); terminate(1); } // N - 1 diagonal SU(N) generators // T^k = i * diag(1, 1, ..., -k, 0, ..., 0) / sqrt(k * (k + 1)) for (i = 0; i < NCOL - 1; i++) { j = NCOL * (NCOL - 1) + i; // Index after +/- above k = i + 1; i_inv_sqrt = cmplx(0.0, 1.0 / sqrt(k * (k + 1.0))); for (l = 0; l <= k; l++) Lambda[j].e[l][l] = i_inv_sqrt; CMULREAL(Lambda[j].e[k][k], -1.0 * k, Lambda[j].e[k][k]); } // U(1) generator i * I_N / sqrt(N) if (DIMF == NCOL * NCOL) { // Allow SU(N) compilation for now i_inv_sqrt = cmplx(0.0, sqrt(one_ov_N)); clear_mat(&(Lambda[DIMF - 1])); for (i = 0; i < NCOL; i++) Lambda[DIMF - 1].e[i][i] = i_inv_sqrt; } #ifdef DEBUG_CHECK // Print Lambdas for (i = 0; i < DIMF; i++){ node0_printf("Lambda[%d]\n",i); if (this_node == 0) dumpmat(&(Lambda[i])); } // Test group theory node0_printf("Check group theory "); node0_printf("Sum_a Lambda^a_{kl} Lambda^a_{ij} = -delta_kj delta_il\n"); for (i = 0; i < NCOL; i++) { for (j = 0; j < NCOL; j++) { for (k = 0; k < NCOL; k++) { for (l = 0; l < NCOL; l++) { trace = cmplx(0, 0); for (a = 0; a < DIMF; a++) { CMUL(Lambda[a].e[k][l], Lambda[a].e[i][j], tt); CSUM(trace, tt); } if (cabs_sq(&trace) > IMAG_TOL) node0_printf("Sum_a La^a_{%d%d} La^a_{%d%d} = (%.4g, %.4g)\n", k, j, i, l, trace.real, trace.imag); } } } } #endif // Test orthogonality and compute products of Lambdas for fermion forces #ifdef DEBUG_CHECK for (i = 0; i < DIMF; i++) { for (j = 0; j < DIMF; j++) { mult_nn(&(Lambda[i]), &(Lambda[j]), &tmat); trace = trace(&tmat); if (trace.real * trace.real > IMAG_TOL) node0_printf("Tr[T_%d T_%d] = (%.4g, %.4g)\n", i, j, trace.real, trace.imag); } } #endif }
int congrad_xxx( field_offset src, /* type wilson_vector (where source is to be created)*/ Real cgmass, /* unused here*/ int source_chirality /* chirality sector for inversion (NOT USED) */ ) { register int i; register site *s; int j,k, avs_iters, avm_iters,status,flag; int MaxCG; int ksource, spin,color,my_chirality,chb,che,chbo,cheo,ii,jj; Real *RsdCG; Real size_r,one_minus_m,r02inv; wilson_vector **psim; void setup_multi(); w_prop_file *fp_out_w[MAX_MASSES]; /* For propagator files */ w_prop_file *fp_in_w[MAX_MASSES]; /* For propagator files */ w_prop_file *h0_out_w[MAX_MASSES]; /* For intermediate propagator files */ #ifdef EIGO wilson_vector wproj; complex ctmp,cd,*cproj; int l; int icount, ivec; int *chiral_check; Real cdp, cdm; Real *ca, *cb; Real eps, mu, denom; #endif double source_norm; RsdCG=resid; MaxCG=niter; avs_iters=0; r02inv= -0.5/R0; #ifdef MINN do_minn=1; #endif setup_multi(); #ifdef EIGO if(Nvecs_hov != 0)cproj = (complex *)malloc(Nvecs_hov*sizeof(complex)); /* check chirality of your modes (to identify zero modes) */ if(Nvecs_hov != 0)chiral_check= (int *)malloc(Nvecs_hov*sizeof(int)); for(j=0;j<Nvecs_hov;j++){ cdp=0.0; cdm=0.0; FORALLSITES(i,s){ for(l=0;l<2;l++)for(k=0;k<3;k++){ cdp += cabs_sq(&(eigVec[j][i].d[l].c[k])); } for(l=2;l<4;l++)for(k=0;k<3;k++){ cdm += cabs_sq(&(eigVec[j][i].d[l].c[k])); } } g_floatsum(&cdp); g_floatsum(&cdm); if(cdm< 1.e-6 && cdp >1.e-6) chiral_check[j] =1; else if (cdm >1.e-6 && cdp < 1.e-6) chiral_check[j] = -1; else if (cdm >1.e-6 && cdp > 1.e-6) chiral_check[j] =0; else{ node0_printf("eigVec0[%d] is a null vector!\n",j); exit(1); } } /* the mode propagator matrix */ /* I am stupid--how to do this in a 2-d array?? */ if(Nvecs_hov != 0){ ca= (Real *)malloc(num_masses*Nvecs_hov*sizeof(Real)); cb= (Real *)malloc(num_masses*Nvecs_hov*sizeof(Real)); } /* initialize the coefficients of the propagator matrix for modes */ for(k=0;k<num_masses;k++)for(ivec=0;ivec<Nvecs_hov;ivec++){ icount=Nvecs_hov*k + ivec; if(chiral_check[ivec]==0){ mu=mass[k]/(2.0*R0); eps= sqrt(eigVal[ivec])/(2.0*R0); denom= (mu*mu+eps*eps*(1.0-mu*mu))*2.0*R0; ca[icount]= mu*(1.0-eps*eps)/denom; cb[icount]= eps*sqrt(1.0-eps*eps)/denom; } else{ ca[icount]= 1.0/mass[k]; cb[icount]= 0.0; } node0_printf("mass %e mode %d %d %e %e\n",mass[k],ivec, chiral_check[ivec],ca[icount],cb[icount]); } #endif /* open the prop files */ for(k=0;k<num_masses;k++){ fp_in_w[k] = r_open_wprop(startflag_w[k], startfile_w[k]); fp_out_w[k] = w_open_wprop(saveflag_w[k], savefile_w[k], wqs.type); #ifdef H0INV h0_out_w[k] = w_open_wprop(saveflag_w3[k], savefile_w3[k], wqs.type); #endif } for(ksource = 0; ksource < wqs.nsource; ksource++){ spin = convert_ksource_to_spin(ksource); color = convert_ksource_to_color(ksource); // /* Loop over source spins */ // for(spin=0;spin<4;spin++){ // /* Loop over source colors */ // for(color=0;color<3;color++){ node0_printf("Propagator color %d spin %d\n",color,spin); if(startflag_w[0] == FRESH){flag=0;} else{ /* check if there's a propagator already there--Do for all masses */ flag=1; for(k=0;k<num_masses && flag==1 ;k++){ #ifdef IOTIME status = reload_wprop_sc_to_site( startflag_w[k], fp_in_w[k], &wqs, spin, color, F_OFFSET(psi),1); #else status = reload_wprop_sc_to_site( startflag_w[k], fp_in_w[k], &wqs, spin, color, F_OFFSET(psi),0); #endif if(status != 0){ node0_printf("congrad_outer_p: computing prop\n"); /* reload_wprop_sc_to_site( FRESH, fp_in_w[k], &wqs, spin, color, F_OFFSET(psi),0); */ flag = 0; } else{ /* status = 1--put the propagator in the new output file so all the elements are in one place. This will fail if the propagator generation did not write the same number of elements for each mass value propagator */ #ifdef IOTIME save_wprop_sc_from_site( saveflag_w[k],fp_out_w[k], &wqs, spin,color,F_OFFSET(psi),1); #else save_wprop_sc_from_site( saveflag_w[k],fp_out_w[k], &wqs, spin,color,F_OFFSET(psi),0); #endif } } /* k loop */ } /*startflag_w != FRESH */ if(flag==0){ /* proceed to inversion */ if(spin<2){my_chirality=1;chb=0;che=2;chbo=2;cheo=4;} else {my_chirality= -1;chb=2,che=4;chbo=0;cheo=2;} chirality_flag=my_chirality; /* Make source */ /* Complete the source structure */ /* NEEDS FIXING!! */ // wqs.color = color; // wqs.spin = spin; /* For wilson_info */ wqstmp = wqs; // status = w_source_site(src,&wqs); status = wv_source_site(src,&wqs); /* check original source size... */ source_norm=0.0; FORALLSITES(i,s){ source_norm += (double)magsq_wvec(((wilson_vector *)F_PT(s,src)) ); } g_doublesum( &source_norm ); if(this_node==0){ printf("Original: source_norm = %e\n",source_norm); fflush(stdout); } FORALLSITES(i,s) copy_wvec((wilson_vector *)F_PT(s,src),&(s->chi0)); #ifdef EIGO /* project out the eigenvectors from the source */ node0_printf("removing %d modes from source\n",Nvecs_hov); for(j=0;j<Nvecs_hov;j++){ cd=cmplx(0.0,0.0); FORALLSITES(i,s){ /* wproj will hold the chiral projections-- recall we have ``packed'' two chiralities into eigVec */ clear_wvec(&wproj); for(ii=chb;ii<che;ii++)for(jj=0;jj<3;jj++){ wproj.d[ii].c[jj]=eigVec[j][i].d[ii].c[jj]; } ctmp = wvec_dot( &(wproj),(wilson_vector *)F_PT(s,src)); CSUM(cd,ctmp); } g_complexsum(&cd); cproj[j]=cd; node0_printf("projector %d %e %e\n",j,cproj[j].real,cproj[j].imag); CMULREAL(cd,-1.0,cd); FORALLSITES(i,s){ clear_wvec(&wproj); for(ii=chb;ii<che;ii++)for(jj=0;jj<3;jj++){ wproj.d[ii].c[jj]=eigVec[j][i].d[ii].c[jj]; } c_scalar_mult_add_wvec(&(s->chi0), &(wproj), &cd, &(s->chi0) ); } }