void computeLongLinkCPU(void** longlink, su3_matrix** sitelinkEx, Float* act_path_coeff) { int E[4]; for(int dir=0; dir<4; ++dir) E[dir] = Z[dir]+4; const int extended_volume = E[3]*E[2]*E[1]*E[0]; su3_matrix temp; for(int t=0; t<Z[3]; ++t) { for(int z=0; z<Z[2]; ++z) { for(int y=0; y<Z[1]; ++y) { for(int x=0; x<Z[0]; ++x) { const int oddBit = (x+y+z+t)&1; int little_index = ((((t*Z[2] + z)*Z[1] + y)*Z[0] + x)/2) + oddBit*Vh; int large_index = (((((t+2)*E[2] + (z+2))*E[1] + (y+2))*E[0] + x+2)/2) + oddBit*(extended_volume/2); for(int dir=XUP; dir<=TUP; ++dir) { int dx[4] = {0,0,0,0}; su3_matrix* llink = ((su3_matrix*)longlink[dir]) + little_index; llfat_scalar_mult_su3_matrix(sitelinkEx[dir]+large_index, act_path_coeff[1], llink); dx[dir] = 1; int nbr_index = neighborIndexFullLattice(E, large_index, dx); llfat_mult_su3_nn(llink, sitelinkEx[dir]+nbr_index, &temp); dx[dir] = 2; nbr_index = neighborIndexFullLattice(E, large_index, dx); llfat_mult_su3_nn(&temp, sitelinkEx[dir]+nbr_index, llink); } } // x } // y } // z } // t return; }
void computeLongLinkCPU(void** longlink, su3_matrix** sitelink, Float* act_path_coeff) { su3_matrix temp; for(int dir=XUP; dir<=TUP; ++dir) { int dx[4] = {0,0,0,0}; for(int i=0; i<V; ++i) { // Initialize the longlinks su3_matrix* llink = ((su3_matrix*)longlink[dir]) + i; llfat_scalar_mult_su3_matrix(sitelink[dir]+i, act_path_coeff[1], llink); dx[dir] = 1; int nbr_idx = neighborIndexFullLattice(Z, i, dx); llfat_mult_su3_nn(llink, sitelink[dir]+nbr_idx, &temp); dx[dir] = 2; nbr_idx = neighborIndexFullLattice(Z, i, dx); llfat_mult_su3_nn(&temp, sitelink[dir]+nbr_idx, llink); } } return; }
void llfat_compute_gen_staple_field_mg(su3_matrix *staple, int mu, int nu, su3_matrix* mulink, su3_matrix** ghost_mulink, su3_matrix** sitelink, su3_matrix** ghost_sitelink, su3_matrix** ghost_sitelink_diag, void** fatlink, Real coef, int use_staple) { su3_matrix tmat1,tmat2; int i ; su3_matrix *fat1; int X1 = Z[0]; int X2 = Z[1]; int X3 = Z[2]; //int X4 = Z[3]; int X1h =X1/2; int X2X1 = X1*X2; int X3X2 = X3*X2; int X3X1 = X3*X1; /* Upper staple */ /* Computes the staple : * mu (B) * +-------+ * nu | | * (A) | |(C) * X X * * Where the mu link can be any su3_matrix. The result is saved in staple. * if staple==NULL then the result is not saved. * It also adds the computed staple to the fatlink[mu] with weight coef. */ int dx[4]; /* upper staple */ for(i=0; i < V; i++) { int half_index = i; int oddBit =0; if (i >= Vh) { oddBit = 1; half_index = i -Vh; } //int x4 = x4_from_full_index(i); int sid =half_index; int za = sid/X1h; int x1h = sid - za*X1h; int zb = za/X2; int x2 = za - zb*X2; int x4 = zb/X3; int x3 = zb - x4*X3; int x1odd = (x2 + x3 + x4 + oddBit) & 1; int x1 = 2*x1h + x1odd; int x[4] = {x1,x2,x3,x4}; int space_con[4]= { (x4*X3X2+x3*X2+x2)/2, (x4*X3X1+x3*X1+x1)/2, (x4*X2X1+x2*X1+x1)/2, (x3*X2X1+x2*X1+x1)/2 }; fat1 = ((su3_matrix*)fatlink[mu]) + i; su3_matrix* A = sitelink[nu] + i; memset(dx, 0, sizeof(dx)); dx[nu] =1; int nbr_idx; su3_matrix* B; if (use_staple) { if (x[nu] + dx[nu] >= Z[nu]) { B = ghost_mulink[nu] + Vs[nu] + (1-oddBit)*Vsh[nu] + space_con[nu]; } else { nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); B = mulink + nbr_idx; } } else { if(x[nu]+dx[nu] >= Z[nu]) { //out of boundary, use ghost data B = ghost_sitelink[nu] + 4*Vs[nu] + mu*Vs[nu] + (1-oddBit)*Vsh[nu] + space_con[nu]; } else { nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); B = sitelink[mu] + nbr_idx; } } //we could be in the ghost link area if mu is T and we are at high T boundary su3_matrix* C; memset(dx, 0, sizeof(dx)); dx[mu] =1; if(x[mu] + dx[mu] >= Z[mu]) { //out of boundary, use ghost data C = ghost_sitelink[mu] + 4*Vs[mu] + nu*Vs[mu] + (1-oddBit)*Vsh[mu] + space_con[mu]; } else { nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2],dx[1],dx[0]); C = sitelink[nu] + nbr_idx; } llfat_mult_su3_nn( A, B,&tmat1); if(staple!=NULL) { /* Save the staple */ llfat_mult_su3_na( &tmat1, C, &staple[i]); } else { /* No need to save the staple. Add it to the fatlinks */ llfat_mult_su3_na( &tmat1, C, &tmat2); llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1); } } /***************lower staple**************** * * X X * nu | | * (A) | |(C) * +-------+ * mu (B) * *********************************************/ for(i=0; i < V; i++) { int half_index = i; int oddBit =0; if (i >= Vh) { oddBit = 1; half_index = i -Vh; } int sid =half_index; int za = sid/X1h; int x1h = sid - za*X1h; int zb = za/X2; int x2 = za - zb*X2; int x4 = zb/X3; int x3 = zb - x4*X3; int x1odd = (x2 + x3 + x4 + oddBit) & 1; int x1 = 2*x1h + x1odd; int x[4] = {x1,x2,x3,x4}; int space_con[4]= { (x4*X3X2+x3*X2+x2)/2, (x4*X3X1+x3*X1+x1)/2, (x4*X2X1+x2*X1+x1)/2, (x3*X2X1+x2*X1+x1)/2 }; //int x4 = x4_from_full_index(i); fat1 = ((su3_matrix*)fatlink[mu]) + i; //we could be in the ghost link area if nu is T and we are at low T boundary su3_matrix* A; memset(dx, 0, sizeof(dx)); dx[nu] = -1; int nbr_idx; if(x[nu] + dx[nu] < 0) { //out of boundary, use ghost data A = ghost_sitelink[nu] + nu*Vs[nu] + (1-oddBit)*Vsh[nu] + space_con[nu]; } else { nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); A = sitelink[nu] + nbr_idx; } su3_matrix* B; if (use_staple) { nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); if (x[nu] + dx[nu] < 0) { B = ghost_mulink[nu] + (1-oddBit)*Vsh[nu] + space_con[nu]; } else { B = mulink + nbr_idx; } } else { if(x[nu] + dx[nu] < 0) { //out of boundary, use ghost data B = ghost_sitelink[nu] + mu*Vs[nu] + (1-oddBit)*Vsh[nu] + space_con[nu]; } else { nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); B = sitelink[mu] + nbr_idx; } } //we could be in the ghost link area if nu is T and we are at low T boundary // or mu is T and we are on high T boundary su3_matrix* C; memset(dx, 0, sizeof(dx)); dx[nu] = -1; dx[mu] = 1; nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2],dx[1],dx[0]); //space con must be recomputed because we have coodinates change in 2 directions int new_x1, new_x2, new_x3, new_x4; new_x1 = (x[0] + dx[0] + Z[0])%Z[0]; new_x2 = (x[1] + dx[1] + Z[1])%Z[1]; new_x3 = (x[2] + dx[2] + Z[2])%Z[2]; new_x4 = (x[3] + dx[3] + Z[3])%Z[3]; int new_x[4] = {new_x1, new_x2, new_x3, new_x4}; space_con[0] = (new_x4*X3X2 + new_x3*X2 + new_x2)/2; space_con[1] = (new_x4*X3X1 + new_x3*X1 + new_x1)/2; space_con[2] = (new_x4*X2X1 + new_x2*X1 + new_x1)/2; space_con[3] = (new_x3*X2X1 + new_x2*X1 + new_x1)/2; if( (x[nu] + dx[nu]) < 0 && (x[mu] + dx[mu] >= Z[mu])) { //find the other 2 directions, dir1, dir2 //with dir2 the slowest changing direction int dir1, dir2; //other two dimensions for(dir1=0; dir1 < 4; dir1 ++) { if(dir1 != nu && dir1 != mu) { break; } } for(dir2=0; dir2 < 4; dir2 ++) { if(dir2 != nu && dir2 != mu && dir2 != dir1) { break; } } C = ghost_sitelink_diag[nu*4+mu] + oddBit*Z[dir1]*Z[dir2]/2 + (new_x[dir2]*Z[dir1]+new_x[dir1])/2; } else if (x[nu] + dx[nu] < 0) { C = ghost_sitelink[nu] + nu*Vs[nu] + oddBit*Vsh[nu]+ space_con[nu]; } else if (x[mu] + dx[mu] >= Z[mu]) { C = ghost_sitelink[mu] + 4*Vs[mu] + nu*Vs[mu] + oddBit*Vsh[mu]+space_con[mu]; } else { C = sitelink[nu] + nbr_idx; } llfat_mult_su3_an( A, B,&tmat1); llfat_mult_su3_nn( &tmat1, C,&tmat2); if(staple!=NULL) { /* Save the staple */ llfat_add_su3_matrix(&staple[i], &tmat2, &staple[i]); llfat_scalar_mult_add_su3_matrix(fat1, &staple[i], coef, fat1); } else { /* No need to save the staple. Add it to the fatlinks */ llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1); } } } /* compute_gen_staple_site */
void llfat_compute_gen_staple_field(su3_matrix *staple, int mu, int nu, su3_matrix* mulink, su3_matrix** sitelink, void** fatlink, Real coef, int use_staple) { su3_matrix tmat1,tmat2; int i ; su3_matrix *fat1; /* Upper staple */ /* Computes the staple : * mu (B) * +-------+ * nu | | * (A) | |(C) * X X * * Where the mu link can be any su3_matrix. The result is saved in staple. * if staple==NULL then the result is not saved. * It also adds the computed staple to the fatlink[mu] with weight coef. */ int dx[4]; /* upper staple */ for(i=0; i < V; i++) { fat1 = ((su3_matrix*)fatlink[mu]) + i; su3_matrix* A = sitelink[nu] + i; memset(dx, 0, sizeof(dx)); dx[nu] =1; int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); su3_matrix* B; if (use_staple) { B = mulink + nbr_idx; } else { B = mulink + nbr_idx; } memset(dx, 0, sizeof(dx)); dx[mu] =1; nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2],dx[1],dx[0]); su3_matrix* C = sitelink[nu] + nbr_idx; llfat_mult_su3_nn( A, B,&tmat1); if(staple!=NULL) { /* Save the staple */ llfat_mult_su3_na( &tmat1, C, &staple[i]); } else { /* No need to save the staple. Add it to the fatlinks */ llfat_mult_su3_na( &tmat1, C, &tmat2); llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1); } } /***************lower staple**************** * * X X * nu | | * (A) | |(C) * +-------+ * mu (B) * *********************************************/ for(i=0; i < V; i++) { fat1 = ((su3_matrix*)fatlink[mu]) + i; memset(dx, 0, sizeof(dx)); dx[nu] = -1; int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); if (nbr_idx >= V || nbr_idx <0) { fprintf(stderr, "ERROR: invliad nbr_idx(%d), line=%d\n", nbr_idx, __LINE__); exit(1); } su3_matrix* A = sitelink[nu] + nbr_idx; su3_matrix* B; if (use_staple) { B = mulink + nbr_idx; } else { B = mulink + nbr_idx; } memset(dx, 0, sizeof(dx)); dx[mu] = 1; nbr_idx = neighborIndexFullLattice(nbr_idx, dx[3], dx[2],dx[1],dx[0]); su3_matrix* C = sitelink[nu] + nbr_idx; llfat_mult_su3_an( A, B,&tmat1); llfat_mult_su3_nn( &tmat1, C,&tmat2); if(staple!=NULL) { /* Save the staple */ llfat_add_su3_matrix(&staple[i], &tmat2, &staple[i]); llfat_scalar_mult_add_su3_matrix(fat1, &staple[i], coef, fat1); } else { /* No need to save the staple. Add it to the fatlinks */ llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1); } } } /* compute_gen_staple_site */
void llfat_compute_gen_staple_field_mg(su3_matrix *staple, int mu, int nu, su3_matrix* mulink, su3_matrix* ghost_mulink, su3_matrix** sitelink, su3_matrix* ghost_sitelink, void** fatlink, Real coef, int use_staple) { su3_matrix tmat1,tmat2; int i ; su3_matrix *fat1; /* Upper staple */ /* Computes the staple : * mu (B) * +-------+ * nu | | * (A) | |(C) * X X * * Where the mu link can be any su3_matrix. The result is saved in staple. * if staple==NULL then the result is not saved. * It also adds the computed staple to the fatlink[mu] with weight coef. */ int dx[4]; /* upper staple */ for(i=0;i < V;i++){ int half_index = i; int oddBit =0; if (i >= Vh){ oddBit = 1; half_index = i -Vh; } int x4 = x4_from_full_index(i); fat1 = ((su3_matrix*)fatlink[mu]) + i; su3_matrix* A = sitelink[nu] + i; memset(dx, 0, sizeof(dx)); dx[nu] =1; int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); su3_matrix* B; if (use_staple){ nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); if (x4 + dx[3] >= Z[3]){ B = ghost_mulink + Vs + (1-oddBit)*Vsh + nbr_idx; }else{ B = mulink + nbr_idx; } }else{ nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); if (x4 + dx[3] >= Z[3]){ B = ghost_sitelink + 4*Vs + mu*Vs + (1-oddBit)*Vsh+nbr_idx; }else{ B = mulink + nbr_idx; } } //we could be in the ghost link area if mu is T and we are at high T boundary su3_matrix* C; memset(dx, 0, sizeof(dx)); dx[mu] =1; nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2],dx[1],dx[0]); if (x4 + dx[3] >= Z[3]){ C = ghost_sitelink + 4*Vs + nu*Vs + (1 - oddBit)*Vsh + nbr_idx; }else{ C = sitelink[nu] + nbr_idx; } llfat_mult_su3_nn( A, B,&tmat1); if(staple!=NULL){/* Save the staple */ llfat_mult_su3_na( &tmat1, C, &staple[i]); } else{ /* No need to save the staple. Add it to the fatlinks */ llfat_mult_su3_na( &tmat1, C, &tmat2); llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1); } } /***************lower staple**************** * * X X * nu | | * (A) | |(C) * +-------+ * mu (B) * *********************************************/ for(i=0;i < V;i++){ int half_index = i; int oddBit =0; if (i >= Vh){ oddBit = 1; half_index = i -Vh; } int x4 = x4_from_full_index(i); fat1 = ((su3_matrix*)fatlink[mu]) + i; //we could be in the ghost link area if nu is T and we are at low T boundary su3_matrix* A; memset(dx, 0, sizeof(dx)); dx[nu] = -1; int nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); if (nbr_idx >= V || nbr_idx <0){ fprintf(stderr, "ERROR: invliad nbr_idx(%d), line=%d\n", nbr_idx, __LINE__); exit(1); } if (x4 + dx[3] < 0){ A = ghost_sitelink + nu*Vs + ( 1 -oddBit)*Vsh + nbr_idx; }else{ A = sitelink[nu] + nbr_idx; } su3_matrix* B; nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); if (use_staple){ nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); if (x4 + dx[3] < 0){ B = ghost_mulink + (1-oddBit)*Vsh + nbr_idx; }else{ B = mulink + nbr_idx; } }else{ nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]); if (x4 + dx[3] < 0){ B = ghost_sitelink + mu*Vs + (1-oddBit)*Vsh+nbr_idx; }else{ B = mulink + nbr_idx; } } //we could be in the ghost link area if nu is T and we are at low T boundary // or mu is T and we are on high T boundary su3_matrix* C; memset(dx, 0, sizeof(dx)); dx[nu] = -1; dx[mu] = 1; nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2],dx[1],dx[0]); if (x4 + dx[3] < 0){ //nu is T, we are at low T boundary and we are at the same oddBit // with the starting site C = ghost_sitelink + nu*Vs + oddBit*Vsh+nbr_idx; }else if (x4 + dx[3] >= Z[3]){ //mu is T, we are at high T boundaryand we are at the same oddBit // with the starting site C = ghost_sitelink + 4*Vs + nu*Vs + oddBit*Vsh+nbr_idx; }else{ C = sitelink[nu] + nbr_idx; } llfat_mult_su3_an( A, B,&tmat1); llfat_mult_su3_nn( &tmat1, C,&tmat2); if(staple!=NULL){/* Save the staple */ llfat_add_su3_matrix(&staple[i], &tmat2, &staple[i]); llfat_scalar_mult_add_su3_matrix(fat1, &staple[i], coef, fat1); } else{ /* No need to save the staple. Add it to the fatlinks */ llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1); } } } /* compute_gen_staple_site */