static void generate_voxels() { Noise2D n2d(0); for (int z = 0; z < 65; z++) { for (int y = 0; y < 65; y++) { for (int x = 0; x < 65; x++) { const float fy = (float)y / 65.0f; const int offset = offset_3d({x, y, z}, Vec3i(65)); const float v = n2d.get(x / 16.0f, z / 16.0f) * 0.25f; voxels[offset] = fy - 0.25f - v; }}} }
void qcdoc_su3_recon( char *name) { /**** This section defines all the registers and offsets I need ****/ /* * This marks the argument registers as defined by ABI as off limits * to us until they are freed by "getarg()"; */ int dum = defargcount(4); /*Handle for the loop entry point*/ int branchsite; int branchmu; int retno ; /*------------------------------------------------------------------ * Floating point registers *------------------------------------------------------------------ */ // Reconstruct 8 registers for 4 spinor // reg_array_2d(PSI,Fregs,4,2); reg_array_3d(PSI,Fregs,3,4,2); offset_3d(PSI_IMM,FourSpinType,4,3,2); /*Offsets within 4 spinor*/ // Reconstruct 2 spinor registers #define NEO 2 reg_array_3d(Atmp,Fregs,1,2,2); /*CHIplus regs */ reg_array_3d(Btmp,Fregs,1,2,2); /*CHIminus regs */ int A[NEO][2][2] = { Atmp[0][0][0], Atmp[0][0][1], Atmp[0][1][0], Atmp[0][1][1], -1,-1,-1,-1 }; int B[NEO][2][2] = { Btmp[0][0][0], Btmp[0][0][1], Btmp[0][1][0], Btmp[0][1][1], -1,-1,-1,-1 }; /*Regs for SU3 two spinor multiply ... overlap with the reconstruct*/ /* registers */ int CHIR[3][2][2] = { A[0][0][0],A[0][0][1], A[0][1][0],A[0][1][1], B[0][0][0],B[0][0][1], B[0][1][0],B[0][1][1], PSI[0][0][0],PSI[0][0][1], PSI[0][1][0],PSI[0][1][1] }; offset_3d(CHI_IMM,TwoSpinType,3,2,2); /*Registers for the gauge link (2 rows)*/ int UA[3][2] = { {PSI[0][2][0],PSI[0][2][1]}, {PSI[2][1][0],PSI[2][1][1]}, {PSI[1][0][0],PSI[1][0][1]} }; int UB[3][2] = { {PSI[1][1][0],PSI[1][1][1]}, {PSI[2][0][0],PSI[2][0][1]}, {PSI[1][2][0],PSI[1][2][1]}, }; offset_3d(GIMM , GaugeType, 3, 3 ,2 ); // Other 8 registers used for reduction variables in SU3. // Could use these in reconstruct?? int E[2] = { PSI[2][2][0],PSI[2][2][1]}; /* * FCD used for drain of Chi * Overlap with PSI[*][3][*] */ int F[2] = {PSI[0][3][0],PSI[0][3][1]}; int C[2] = {PSI[1][3][0],PSI[1][3][1]}; int D[2] = {PSI[2][3][0],PSI[2][3][1]}; /* * Integer registers */ alreg(psi,Iregs); alreg(Umu,Iregs); alreg(Ufetch,Iregs); alreg(Chiin,Iregs); alreg(Chiout,Iregs); alreg(Chifetch,Iregs); reg_array_1d(Chiplus,Iregs,4);/*Pointers to the 8 2-spinors for recombination*/ reg_array_1d(Chiminus,Iregs,4); alreg(mu,Iregs); alreg(Chidrain,Iregs); alreg(pref,Iregs); alreg(mem,Iregs); alreg(length,Iregs); int Isize = PROC->I_size; int Fsize = PROC->FP_size; def_off( ZERO_IMM, Byte,0); def_off( PSI_ATOM, FourSpinType, 24); def_off( CHI_ATOM, TwoSpinType, 12); def_off( PAD_CHI_ATOM, TwoSpinType, 16); def_off( MAT_IMM, GaugeType, 18); int Ndim = def_offset(4,Byte,"Ndim"); int Ndimm1 = def_offset(3,Byte,"Ndimm1"); int hbias,bias; /*Offsets handles to stack*/ int hbitbucket = def_offset(16*Isize,Byte,"hbitbucket"); int Tsize; if ( TwoSpinType == Double ) Tsize = PROC->FP_size; else Tsize = PROC->FSP_size; int hstk0 = def_offset(16*Isize+12*Tsize ,Byte,"hstk0"); int hstk1 = def_offset(16*Isize+2*12*Tsize,Byte,"hstk1"); int hstk2 = def_offset(16*Isize+3*12*Tsize,Byte,"hstk2"); int hstk3 = def_offset(16*Isize+4*12*Tsize,Byte,"hstk3"); int hIsize = def_offset(Isize,Byte,"Isize"); int i,co,j,k,nxt,ri,sp,nxtco,eop,eo_a,eo_b; /***********************************************************************/ /* * PROLOGUE */ make_inst(DIRECTIVE,Enter_Routine,name); /*Allocate stack save any callee save registers we need etc...*/ int stack_buf_size; stack_buf_size = 16*Isize + 12*Fsize * 5 ; hbias = grab_stack(stack_buf_size); bias = get_offset(hbias); save_regs(); queue_iadd_imm(mem,PROC->StackPointer,hbias); /*Pointer to buf on stack*/ /*Define our arguments - all pointers ala fortran*/ getarg(psi); getarg(Umu); getarg(Chiin); getarg(length); /*{... Process arguments ...*/ queue_iload(length,ZERO_IMM,length); /*Load in sx counter*/ retno = get_target_label(); /*Branch to exit if yzt <1*/ check_iterations(length,retno); need_cache_line(0); need_cache_line(1); need_cache_line(2); need_cache_line(3); need_cache_line(4); pragma(DCBT_SPACE,5); pragma(DCBT_POST,1); #define LOAD_U(comin,comax)\ /*Load two link rows*/\ for( i = comin;i<=comax;i++ ){\ for( ri=0;ri<2;ri++){ \ queue_fload(UA[i][ri],GIMM[i][0][ri],Umu,GaugeType);\ queue_fload(UB[i][ri],GIMM[i][1][ri],Umu,GaugeType);\ } \ } #define PRELOAD_U LOAD_U(0,1) #define POSTLOAD_U LOAD_U(2,2) PRELOAD_U #define LOAD_CHI(comin,comax) \ /*Load Chi column*/\ for( i = comin;i<=comax;i++ ){\ for( ri=0;ri<2;ri++){\ queue_fload(CHIR[i][0][ri],CHI_IMM[i][0][ri],Chiin,TwoSpinType);\ } \ for( ri=0;ri<2;ri++){\ queue_fload(CHIR[i][1][ri],CHI_IMM[i][1][ri],Chiin,TwoSpinType);\ } \ } #define PRELOAD_CHI LOAD_CHI(0,1) #define POSTLOAD_CHI LOAD_CHI(2,2) #define POSTLOAD \ POSTLOAD_CHI \ POSTLOAD_U do_prefetch(Chiin,0); do_prefetch(Chiin,1); if ( SizeofDatum(TwoSpinType) == 8 ) do_prefetch(Chiin,2); PRELOAD_CHI /* * Start site loop */ queue_iadd_imm(Chidrain,mem,hbitbucket); branchsite = start_loop(length); queue_iadd_imm(Chiout,mem,hstk0); /* * Loop over mu in asm */ queue_iload_imm(mu,Ndimm1); #define CHIDRAIN \ queue_fstore(F[0],CHI_IMM[1][1][0],Chidrain,TwoSpinType);\ queue_fstore(F[1],CHI_IMM[1][1][1],Chidrain,TwoSpinType);\ queue_fstore(C[0],CHI_IMM[2][0][0],Chidrain,TwoSpinType);\ queue_fstore(C[1],CHI_IMM[2][0][1],Chidrain,TwoSpinType);\ queue_fstore(D[0],CHI_IMM[2][1][0],Chidrain,TwoSpinType);\ queue_fstore(D[1],CHI_IMM[2][1][1],Chidrain,TwoSpinType); #define PREFETCH_CHI \ queue_iadd_imm(Chifetch,Chiin,PAD_CHI_ATOM);\ do_prefetch(Chifetch,0);\ do_prefetch(Chifetch,1);\ if ( SizeofDatum(TwoSpinType) == 8 ) do_prefetch(Chifetch,2); #define PREFETCH_CHIF \ queue_iadd_imm(Chifetch,Chifetch,PAD_CHI_ATOM);\ do_prefetch(Chifetch,0);\ do_prefetch(Chifetch,1);\ if ( SizeofDatum(TwoSpinType) == 8 ) do_prefetch(Chifetch,2); for ( int unroll=0; unroll<2; unroll++ ) { if ( unroll==0 ) { branchmu = start_loop(mu); pragma(DCBT_SPACE,5); pragma(STORE_LIM,1); pragma(LOAD_LIM,2); } else { pragma(STORE_LIM,2); pragma(DCBT_SPACE,5); pragma(DCBT_POST,1); pragma(DCBT_PRE,0); pragma(LOAD_LIM,2); } CHIDRAIN POSTLOAD if ( unroll == 0 ) { PREFETCH_CHI queue_iadd_imm(Ufetch,Umu,MAT_IMM); do_prefetch(Ufetch,0); do_prefetch(Ufetch,1); do_prefetch(Ufetch,2); if ( GaugeType == Double ) { do_prefetch(Ufetch,3); do_prefetch(Ufetch,4); } } else { pragma(DCBT_SPACE,3); PREFETCH_CHI PREFETCH_CHIF PREFETCH_CHIF PREFETCH_CHIF } j=0; queue_three_cmuls(C[0],C[1],UA[j][0],UA[j][1],CHIR[j][0][0],CHIR[j][0][1], D[0],D[1],UA[j][0],UA[j][1],CHIR[j][1][0],CHIR[j][1][1], E[0],E[1],UB[j][0],UB[j][1],CHIR[j][0][0],CHIR[j][0][1]); j=1; queue_three_cmadds(C[0],C[1],UA[j][0],UA[j][1],CHIR[j][0][0],CHIR[j][0][1], D[0],D[1],UA[j][0],UA[j][1],CHIR[j][1][0],CHIR[j][1][1], E[0],E[1],UB[j][0],UB[j][1],CHIR[j][0][0],CHIR[j][0][1]); j=2; queue_three_cmadds(C[0],C[1],UA[j][0],UA[j][1],CHIR[j][0][0],CHIR[j][0][1], D[0],D[1],UA[j][0],UA[j][1],CHIR[j][1][0],CHIR[j][1][1], E[0],E[1],UB[j][0],UB[j][1],CHIR[j][0][0],CHIR[j][0][1]); /*Store the first three results*/ queue_fstore(C[0],CHI_IMM[0][0][0],Chiout,TwoSpinType); queue_fstore(C[1],CHI_IMM[0][0][1],Chiout,TwoSpinType); queue_fstore(D[0],CHI_IMM[0][1][0],Chiout,TwoSpinType); queue_fstore(D[1],CHI_IMM[0][1][1],Chiout,TwoSpinType); queue_fstore(E[0],CHI_IMM[1][0][0],Chiout,TwoSpinType); queue_fstore(E[1],CHI_IMM[1][0][1],Chiout,TwoSpinType); /*Load the third row*/ for(j=0; j<3; j++) { for(ri=0; ri<2; ri++) { queue_fload(UA[j][ri],GIMM[j][2][ri],Umu,GaugeType); } } /*Gauge layout is linear, mu faster than site*/ queue_iadd_imm(Umu,Umu,MAT_IMM); /*Now the second set of three cdots*/ j=0; queue_three_cmuls(F[0],F[1],UB[j][0],UB[j][1],CHIR[j][1][0],CHIR[j][1][1], C[0],C[1],UA[j][0],UA[j][1],CHIR[j][0][0],CHIR[j][0][1], D[0],D[1],UA[j][0],UA[j][1],CHIR[j][1][0],CHIR[j][1][1]); j=1; queue_three_cmadds(F[0],F[1],UB[j][0],UB[j][1],CHIR[j][1][0],CHIR[j][1][1], C[0],C[1],UA[j][0],UA[j][1],CHIR[j][0][0],CHIR[j][0][1], D[0],D[1],UA[j][0],UA[j][1],CHIR[j][1][0],CHIR[j][1][1]); j=2; queue_three_cmadds(F[0],F[1],UB[j][0],UB[j][1],CHIR[j][1][0],CHIR[j][1][1], C[0],C[1],UA[j][0],UA[j][1],CHIR[j][0][0],CHIR[j][0][1], D[0],D[1],UA[j][0],UA[j][1],CHIR[j][1][0],CHIR[j][1][1]); /**************END SU3 CODE *************/ queue_iadd_imm(Chiin,Chiin,PAD_CHI_ATOM); queue_iadd_imm(Chidrain,Chiout,ZERO_IMM); queue_iadd_imm(Chiout,Chiout,CHI_ATOM); if ( unroll == 0 ) { PRELOAD_U PRELOAD_CHI } /*********************************************************/ /****************** END OF SU3 MULTIPLY ******************/ /*********************************************************/ if ( unroll== 0 ) { stop_loop(branchmu,mu); /* End loop over mu*/ make_inst(DIRECTIVE,Target,get_target_label() ); /*delineate the sections*/ } } /*********************************************************/ /****************** START OF RECONSTRUCT *****************/ /*********************************************************/ //Address calculation... // Chiminus -> Stack and ChiPlus -> Chiin pragma(STORE_INORDER,1); queue_iadd_imm(Chiminus[0],mem,hstk0); /*For register use reasons loop over colour outermost*/ #define LOAD_CHI_MU0(eo,co) \ for( sp = 0; sp<2;sp++ ){\ for( ri = 0; ri<2;ri++ ){\ queue_fload(A[eo][sp][ri],CHI_IMM[co][sp][ri],Chiminus[0],TwoSpinType);\ if ( co == 0 ) {\ queue_fload(B[eo][sp][ri],CHI_IMM[co][sp][ri],Chiin,TwoSpinType);\ queue_iadd_imm(Chiplus[0],Chiin,ZERO_IMM);\ } else {\ queue_fload(B[eo][sp][ri],CHI_IMM[co][sp][ri],Chiplus [0],TwoSpinType);\ }\ }} pragma(LOAD_LIM,2); LOAD_CHI_MU0(0,0) pragma(DCBT_POST,1); CHIDRAIN int neo_a = NEO; int neo_b = NEO; eo_a = 0; eo_b = 0; for ( co = 0; co <3 ; co ++ ) { pragma(LOAD_LIM,1); if ( co == 0 ) { // Use the third colour for unrolling the loads A[1][0][0] = PSI[2][0][0]; A[1][0][1] = PSI[2][0][1]; A[1][1][0] = PSI[2][1][0]; A[1][1][1] = PSI[2][1][1]; B[1][0][0] = PSI[2][2][0]; B[1][0][1] = PSI[2][2][1]; B[1][1][0] = PSI[2][3][0]; B[1][1][1] = PSI[2][3][1]; queue_iadd_imm(Chiminus[1],mem,hstk1); // This is invariant of loop // Take out queue_iadd_imm(Chiplus[1],Chiin ,PAD_CHI_ATOM); } /*************************************************************** * MU = 0 reconstruct * ****************************************************************/ if ( co == 2 ) { // Flip to not unrolled due to register pressure neo_b = 1; neo_a = 2; A[1][0][0] = PSI[0][0][0]; A[1][0][1] = PSI[0][0][1]; A[1][1][0] = PSI[1][0][0]; A[1][1][1] = PSI[1][0][1]; pragma(DCBT_POST,0); pragma(DCBT_SPACE,1); queue_iadd_imm(Ufetch,Umu,ZERO_IMM); // do_prefetch(Ufetch,0); do_prefetch(Ufetch,1); do_prefetch(Ufetch,2); if ( GaugeType == Double ) { do_prefetch(Ufetch,3); do_prefetch(Ufetch,4); } } /* psi_0 = Chiplus[0] + Chiminus[0] */ /* psi_1 = Chiplus[1] + Chiminus[1] */ queue_fadd(PSI[co][0][0],B[eo_b][0][0],A[eo_a][0][0]); queue_fadd(PSI[co][0][1],B[eo_b][0][1],A[eo_a][0][1]); queue_fadd(PSI[co][1][0],B[eo_b][1][0],A[eo_a][1][0]); queue_fadd(PSI[co][1][1],B[eo_b][1][1],A[eo_a][1][1]); // Dagger = 0: /* psi_2 =-iChiplus[1] +iChiminus[1] */ /* psi_3 =-iChiplus[0] +iChiminus[0] */ // Dagger = 1: /* psi_2 = iChiplus[1] -iChiminus[1] */ /* psi_3 = iChiplus[0] -iChiminus[0] */ if ( dagger == 0 ) { queue_fsub(PSI[co][2][0],B[eo_b][1][1],A[eo_a][1][1]); queue_fsub(PSI[co][2][1],A[eo_a][1][0],B[eo_b][1][0]); queue_fsub(PSI[co][3][0],B[eo_b][0][1],A[eo_a][0][1]); queue_fsub(PSI[co][3][1],A[eo_a][0][0],B[eo_b][0][0]); } else { queue_fsub(PSI[co][2][0],A[eo_a][1][1],B[eo_b][1][1]); queue_fsub(PSI[co][2][1],B[eo_b][1][0],A[eo_a][1][0]); queue_fsub(PSI[co][3][0],A[eo_a][0][1],B[eo_b][0][1]); queue_fsub(PSI[co][3][1],B[eo_b][0][0],A[eo_a][0][0]); } /*************************************************************** * MU = 1 reconstruct * ****************************************************************/ eo_a = (eo_a+1)%neo_a; eo_b = (eo_b+1)%neo_b; for( sp = 0; sp<2; sp++ ) { for( ri = 0; ri<2; ri++ ) { queue_fload(A[eo_a][sp][ri],CHI_IMM[co][sp][ri],Chiminus[1],TwoSpinType); queue_fload(B[eo_b][sp][ri],CHI_IMM[co][sp][ri],Chiplus [1],TwoSpinType); } } if ( co == 0 ) { queue_iadd_imm(Chiminus[2],mem,hstk2); queue_iadd_imm(Chiminus[3],mem,hstk3); queue_iadd_imm(Chiplus[2],Chiplus[1],PAD_CHI_ATOM); queue_iadd_imm(Chiplus[3],Chiplus[2],PAD_CHI_ATOM); } /* psi_0 += Chiplus[0] + Chiminus[0] */ /* psi_1 += Chiplus[1] + Chiminus[1] */ queue_fadd(PSI[co][0][0],PSI[co][0][0],B[eo_b][0][0]); queue_fadd(PSI[co][0][1],PSI[co][0][1],B[eo_b][0][1]); queue_fadd(PSI[co][1][0],PSI[co][1][0],B[eo_b][1][0]); queue_fadd(PSI[co][1][1],PSI[co][1][1],B[eo_b][1][1]); queue_fadd(PSI[co][0][0],PSI[co][0][0],A[eo_a][0][0]); queue_fadd(PSI[co][0][1],PSI[co][0][1],A[eo_a][0][1]); queue_fadd(PSI[co][1][0],PSI[co][1][0],A[eo_a][1][0]); queue_fadd(PSI[co][1][1],PSI[co][1][1],A[eo_a][1][1]); //Dagger == 0 /* psi_2 += Chiplus[1] - Chiminus[1] */ /* psi_3 += -Chiplus[0] + Chiminus[0] */ //Dagger == 1 /* psi_2 -= Chiplus[1] - Chiminus[1] */ /* psi_3 -= -Chiplus[0] + Chiminus[0] */ if ( dagger == 0 ) { queue_fadd(PSI[co][2][0],PSI[co][2][0],B[eo_b][1][0]); queue_fadd(PSI[co][2][1],PSI[co][2][1],B[eo_b][1][1]); queue_fsub(PSI[co][2][0],PSI[co][2][0],A[eo_a][1][0]); queue_fsub(PSI[co][2][1],PSI[co][2][1],A[eo_a][1][1]); queue_fsub(PSI[co][3][0],PSI[co][3][0],B[eo_b][0][0]); queue_fsub(PSI[co][3][1],PSI[co][3][1],B[eo_b][0][1]); queue_fadd(PSI[co][3][0],PSI[co][3][0],A[eo_a][0][0]); queue_fadd(PSI[co][3][1],PSI[co][3][1],A[eo_a][0][1]); } else { queue_fsub(PSI[co][2][0],PSI[co][2][0],B[eo_b][1][0]); queue_fsub(PSI[co][2][1],PSI[co][2][1],B[eo_b][1][1]); queue_fadd(PSI[co][2][0],PSI[co][2][0],A[eo_a][1][0]); queue_fadd(PSI[co][2][1],PSI[co][2][1],A[eo_a][1][1]); queue_fadd(PSI[co][3][0],PSI[co][3][0],B[eo_b][0][0]); queue_fadd(PSI[co][3][1],PSI[co][3][1],B[eo_b][0][1]); queue_fsub(PSI[co][3][0],PSI[co][3][0],A[eo_a][0][0]); queue_fsub(PSI[co][3][1],PSI[co][3][1],A[eo_a][0][1]); } /*************************************************************** * MU = 2 reconstruct * ****************************************************************/ eo_a = (eo_a+1)%neo_a; eo_b = (eo_b+1)%neo_b; for( sp = 0; sp<2; sp++ ) { for( ri = 0; ri<2; ri++ ) { queue_fload(A[eo_a][sp][ri],CHI_IMM[co][sp][ri],Chiminus[2],TwoSpinType); queue_fload(B[eo_b][sp][ri],CHI_IMM[co][sp][ri],Chiplus [2],TwoSpinType); } } /* psi_0 += Chiplus[0] + Chiminus[0] */ /* psi_1 += Chiplus[1] + Chiminus[1] */ queue_fadd(PSI[co][0][0],PSI[co][0][0],B[eo_b][0][0]); queue_fadd(PSI[co][0][1],PSI[co][0][1],B[eo_b][0][1]); queue_fadd(PSI[co][1][0],PSI[co][1][0],B[eo_b][1][0]); queue_fadd(PSI[co][1][1],PSI[co][1][1],B[eo_b][1][1]); queue_fadd(PSI[co][0][0],PSI[co][0][0],A[eo_a][0][0]); queue_fadd(PSI[co][0][1],PSI[co][0][1],A[eo_a][0][1]); queue_fadd(PSI[co][1][0],PSI[co][1][0],A[eo_a][1][0]); queue_fadd(PSI[co][1][1],PSI[co][1][1],A[eo_a][1][1]); //Dagger == 0 /* psi_2 +=-iChiplus[0] +iChiminus[0] */ /* psi_3 += iChiplus[1] -iChiminus[1] */ //Dagger == 1 /* psi_2 -=-iChiplus[0] +iChiminus[0] */ /* psi_3 -= iChiplus[1] -iChiminus[1] */ if ( dagger == 0 ) { queue_fadd(PSI[co][2][0],PSI[co][2][0],B[eo_b][0][1]); queue_fsub(PSI[co][2][1],PSI[co][2][1],B[eo_b][0][0]); queue_fsub(PSI[co][2][0],PSI[co][2][0],A[eo_a][0][1]); queue_fadd(PSI[co][2][1],PSI[co][2][1],A[eo_a][0][0]); queue_fsub(PSI[co][3][0],PSI[co][3][0],B[eo_b][1][1]); queue_fadd(PSI[co][3][1],PSI[co][3][1],B[eo_b][1][0]); queue_fadd(PSI[co][3][0],PSI[co][3][0],A[eo_a][1][1]); queue_fsub(PSI[co][3][1],PSI[co][3][1],A[eo_a][1][0]); } else { queue_fsub(PSI[co][2][0],PSI[co][2][0],B[eo_b][0][1]); queue_fadd(PSI[co][2][1],PSI[co][2][1],B[eo_b][0][0]); queue_fadd(PSI[co][2][0],PSI[co][2][0],A[eo_a][0][1]); queue_fsub(PSI[co][2][1],PSI[co][2][1],A[eo_a][0][0]); queue_fadd(PSI[co][3][0],PSI[co][3][0],B[eo_b][1][1]); queue_fsub(PSI[co][3][1],PSI[co][3][1],B[eo_b][1][0]); queue_fsub(PSI[co][3][0],PSI[co][3][0],A[eo_a][1][1]); queue_fadd(PSI[co][3][1],PSI[co][3][1],A[eo_a][1][0]); } /*************************************************************** * MU = 3 reconstruct * ****************************************************************/ pragma(LOAD_LIM,2); eo_a = (eo_a+1)%neo_a; eo_b = (eo_b+1)%neo_b; for( sp = 0; sp<2; sp++ ) { for( ri = 0; ri<2; ri++ ) { queue_fload(A[eo_a][sp][ri],CHI_IMM[co][sp][ri],Chiminus[3],TwoSpinType); queue_fload(B[eo_b][sp][ri],CHI_IMM[co][sp][ri],Chiplus [3],TwoSpinType ); } } /* psi_0 += Chiplus[0] + Chiminus[0] */ /* psi_1 += Chiplus[1] + Chiminus[1] */ queue_fadd(PSI[co][0][0],PSI[co][0][0],B[eo_b][0][0]); queue_fadd(PSI[co][0][1],PSI[co][0][1],B[eo_b][0][1]); queue_fadd(PSI[co][1][0],PSI[co][1][0],B[eo_b][1][0]); queue_fadd(PSI[co][1][1],PSI[co][1][1],B[eo_b][1][1]); //Dagger == 0 /* psi_2 += Chiplus[0] - Chiminus[0] */ /* psi_3 += Chiplus[1] - Chiminus[1] */ //Dagger == 1 /* psi_2 -= Chiplus[0] - Chiminus[0] */ /* psi_3 -= Chiplus[1] - Chiminus[1] */ if ( dagger == 0 ) { queue_fadd(PSI[co][2][0],PSI[co][2][0],B[eo_b][0][0]); queue_fadd(PSI[co][2][1],PSI[co][2][1],B[eo_b][0][1]); queue_fadd(PSI[co][3][0],PSI[co][3][0],B[eo_b][1][0]); queue_fadd(PSI[co][3][1],PSI[co][3][1],B[eo_b][1][1]); } else { queue_fsub(PSI[co][2][0],PSI[co][2][0],B[eo_b][0][0]); queue_fsub(PSI[co][2][1],PSI[co][2][1],B[eo_b][0][1]); queue_fsub(PSI[co][3][0],PSI[co][3][0],B[eo_b][1][0]); queue_fsub(PSI[co][3][1],PSI[co][3][1],B[eo_b][1][1]); } queue_fadd(PSI[co][0][0],PSI[co][0][0],A[eo_a][0][0]); queue_fadd(PSI[co][0][1],PSI[co][0][1],A[eo_a][0][1]); queue_fadd(PSI[co][1][0],PSI[co][1][0],A[eo_a][1][0]); queue_fadd(PSI[co][1][1],PSI[co][1][1],A[eo_a][1][1]); if ( dagger == 0 ) { queue_fsub(PSI[co][2][0],PSI[co][2][0],A[eo_a][0][0]); queue_fsub(PSI[co][2][1],PSI[co][2][1],A[eo_a][0][1]); queue_fsub(PSI[co][3][0],PSI[co][3][0],A[eo_a][1][0]); queue_fsub(PSI[co][3][1],PSI[co][3][1],A[eo_a][1][1]); } else { queue_fadd(PSI[co][2][0],PSI[co][2][0],A[eo_a][0][0]); queue_fadd(PSI[co][2][1],PSI[co][2][1],A[eo_a][0][1]); queue_fadd(PSI[co][3][0],PSI[co][3][0],A[eo_a][1][0]); queue_fadd(PSI[co][3][1],PSI[co][3][1],A[eo_a][1][1]); } /* * Store the spinors. If this is problematic * in terms of PEC WriteBuf misses, I could * store to the stack and copy out later. */ if ( co != 2 ) { LOAD_CHI_MU0(0,co+1) eo_a=0; eo_b=0; } queue_fstore(PSI[co][0][0],PSI_IMM[0][co][0],psi,FourSpinType); queue_fstore(PSI[co][0][1],PSI_IMM[0][co][1],psi,FourSpinType); } /* * Store out in linear order now */ pragma(STORE_LIM,2); pragma(DCBT_SPACE,8); for ( co=0; co<3; co ++ ) { queue_fstore(PSI[co][1][0],PSI_IMM[1][co][0],psi,FourSpinType); queue_fstore(PSI[co][1][1],PSI_IMM[1][co][1],psi,FourSpinType); } for ( co=0; co<3; co ++ ) { queue_fstore(PSI[co][2][0],PSI_IMM[2][co][0],psi,FourSpinType); queue_fstore(PSI[co][2][1],PSI_IMM[2][co][1],psi,FourSpinType); } if ( TwoSpinType == FourSpinType ) { queue_iadd_imm(Chidrain,psi,CHI_ATOM); } else { queue_iadd_imm(Chidrain,mem,hbitbucket); for ( co=0; co<3; co ++ ) { queue_fstore(PSI[co][3][0],PSI_IMM[3][co][0],psi,FourSpinType); queue_fstore(PSI[co][3][1],PSI_IMM[3][co][1],psi,FourSpinType); } } queue_iadd_imm(psi,psi,PSI_ATOM); /* * Put in an artificial dependency here * to try to stop the preloads getting above the last load of * reconstruct. */ queue_iadd_imm(Chiplus[3],Chiplus[3],ZERO_IMM); queue_iadd_imm(Chiin ,Chiplus[3],PAD_CHI_ATOM); pragma(DCBT_SPACE,0); do_prefetch(Chiin,0); do_prefetch(Chiin,1); if ( SizeofDatum(TwoSpinType) == 8 )do_prefetch(Chiin,2); PRELOAD_U PRELOAD_CHI /* TERMINATION point of the loop*/ stop_loop(branchsite,length); CHIDRAIN make_inst(DIRECTIVE,Target,retno); /* * * EPILOGUE * */ restore_regs(); free_stack(); make_inst(DIRECTIVE,Exit_Routine,name); return; }
void dwf_deriv( char *name) { /* * This marks the argument registers as defined by ABI as off limits * to us until they are freed by "getarg()"; */ int dum = defargcount(1); int retno; /* * S=phi^dag (MdagM)^-1 phi * * dS = phi^dag (MdagM)^-1 [ dMdag M + Mdag dM ] (MdagM)^-1 phi * * Let X = (MdagM)^-1 phi * Y = M X = M^-dag phi * * Want terms: Ydag dM X * Xdag dMdag Y * * Take Xdag 1-gamma Y * * Still a bit confused about the 1+g 1-g terms; but this may be simply a factor of two as we add +h.c. * Will continue to follow Chroma's routine */ reg_array_2d(Y,Cregs,4,3); // 4 spinor - 24 regs reg_array_2d(X,Cregs,4,3); // 4 spinor - 12 regs reg_array_1d(F,Cregs,3); // Force alreg(Z,Cregs); // Zero alreg(creg,Cregs); // Zero offset_3d(CHIIMM,FourSpinType,2,3,2*nsimd()); offset_3d(PSIIMM,FourSpinType,4,3,2*nsimd()); offset_3d(GIMM ,GaugeType, 3, 3 ,2*nsimd() ); def_off( GAUGE_SITE_IMM, FourSpinType,4*18*nsimd()); def_off( MAT_IMM , GaugeType,18*nsimd()); def_off( PSI_IMM , FourSpinType,24*nsimd()); def_off( CHI_IMM , FourSpinType,12*nsimd()); def_off( CONST_ZERO_OFFSET,Double,2*2*nsimd()); /* * Integer registers */ alreg(F_p,Iregs); /*Pointer to the current cpt of force field */ alreg(F_p_s,Iregs); alreg(Y_mu,Iregs); alreg(Y_p,Iregs); alreg(X_p,Iregs); alreg(length,Iregs); /*number of sites*/ alreg(tab,Iregs); /*Pointer to current entry in offset table*/ alreg(Complex_i,Iregs);/*Point to (0,1)x Nsimd*/ alreg(Ls,Iregs); alreg(s,Iregs); alreg(recbuf_base,Iregs); alreg(args,Iregs); alreg(s_offset,Iregs); /*Useful integer immediate constants, in units of Fsize*/ def_off( ZERO_IMM,Byte,0); def_off( minusone,Byte,-1); def_off( one,Byte,1); // Mask bits for predicating directions def_off( mask_0,Byte,1); def_off( mask_1,Byte,2); def_off( mask_2,Byte,4); def_off( mask_3,Byte,8); def_off( mask_4,Byte,16); def_off( mask_5,Byte,32); def_off( mask_6,Byte,64); def_off( mask_7,Byte,128); int mask_imm[8] = { mask_0, mask_1, mask_2, mask_3, mask_4, mask_5, mask_6, mask_7 }; alreg(mask ,Iregs); offset_1d(TAB_IMM,TableType,17); // Integer sizes int Isize = def_offset(PROC->I_size,Byte,"Isize"); int ISsize = def_offset(PROC->IS_size,Byte,"ISsize"); int i,j,co,sp; /*********************************************************************/ make_inst(DIRECTIVE,Enter_Routine,name); grab_stack(0); save_regs(); /********************************************* * our arguments ********************************************* */ getarg(args); /*Pointer to arg list*/ queue_iload(X_p, ZERO_IMM,args); queue_load_addr(args,Isize,args); //0 queue_iload(Y_p, ZERO_IMM,args); queue_load_addr(args,Isize,args); //1 queue_iload(F_p, ZERO_IMM,args); queue_load_addr(args,Isize,args); //2 queue_iload(length,ZERO_IMM,args); queue_load_addr(args,Isize,args); //3 queue_iload(Ls, ZERO_IMM,args); queue_load_addr(args,Isize,args); //4 queue_iload(tab, ZERO_IMM,args); queue_load_addr(args,Isize,args); //5 queue_iload(Complex_i,ZERO_IMM,args);queue_load_addr(args,Isize,args); //6 queue_load_addr(args,Isize,args); //7 queue_iload(recbuf_base,ZERO_IMM,args);queue_load_addr(args,Isize,args); //8 /************************************************** * Load common constants into Iregs ************************************************** */ for (int i =0; i<12; i++ ) { need_constant(i*2*SizeofDatum(FourSpinType)*nsimd()); } for (int i =0; i<9; i++ ) { need_constant(i*2*SizeofDatum(GaugeType)*nsimd()); } complex_constants_prepare(creg,Complex_i); complex_load(Z,CONST_ZERO_OFFSET,Complex_i,Double); // Site loop retno = get_target_label(); check_iterations(length,retno); int branchsite = start_loop(length); // S loop queue_iload_short(mask,TAB_IMM[10],tab); queue_iadd_imm (s,Ls,ZERO_IMM); queue_iload_imm(s_offset,ZERO_IMM); int branchls = start_loop(s); queue_iadd_imm(F_p_s,F_p,ZERO_IMM); // debugI(s); // Loop over directions for ( int mu=0;mu<4;mu++ ) { int dir = mu*2+1; // Always in forward dir // Complex branch structure for interior/exterior neighbours int lab_proj_mu = get_target_label(); int lab_continue = get_target_label(); queue_iand_imm (Y_mu,mask,mask_imm[dir]); // non-zero if exterior check_iterations(Y_mu,lab_proj_mu); // Exterior points are already projected. Just load. queue_iload_short(Y_mu,TAB_IMM[dir],tab); queue_iadd (Y_mu,Y_mu,recbuf_base); // debugI(Y_mu); //debugI(recbuf_base); queue_iadd (Y_mu,Y_mu,s_offset); for(int sp=0;sp<2;sp++){ for(int co=0;co<3;co++){ complex_load(Y[sp][co],PSIIMM[sp][co][0],Y_mu,FourSpinType); } } jump(lab_continue); make_inst(DIRECTIVE,Target,lab_proj_mu); // Interior points are not already projected. // * Spin project 4 spinor queue_iload_short(Y_mu,TAB_IMM[dir],tab); // debugI(tab); // debugI(Y_mu); queue_iadd (Y_mu,Y_mu,Y_p); queue_iadd (Y_mu,Y_mu,s_offset); // offset for this "s" queue_iadd (Y_mu,Y_mu,s_offset); // offset for this "s" for(int sp=0;sp<4;sp++){ for(int co=0;co<3;co++){ complex_load(X[sp][co],PSIIMM[sp][co][0],Y_mu,FourSpinType); // debugC(X[sp][co]); } } int pm = 1; // pm=0 == 1+gamma, pm=1 => 1-gamma if ( dagger ) pm = 0; if ( mu == 0 ) { if ( pm ==0 ) { for(co=0;co<3;co++) complex_ApiB(Y[0][co],X[0][co],X[3][co]); for(co=0;co<3;co++) complex_ApiB(Y[1][co],X[1][co],X[2][co]); } else { for(co=0;co<3;co++) complex_AmiB(Y[0][co],X[0][co],X[3][co]); for(co=0;co<3;co++) complex_AmiB(Y[1][co],X[1][co],X[2][co]); } } else if ( mu == 1 ) { if ( pm ==0 ) { for(co=0;co<3;co++) complex_sub(Y[0][co],X[0][co],X[3][co]); for(co=0;co<3;co++) complex_add(Y[1][co],X[1][co],X[2][co]); } else { for(co=0;co<3;co++) complex_add(Y[0][co],X[0][co],X[3][co]); for(co=0;co<3;co++) complex_sub(Y[1][co],X[1][co],X[2][co]); } } else if ( mu == 2 ) { if ( pm ==0 ) { for(co=0;co<3;co++) complex_ApiB(Y[0][co],X[0][co],X[2][co]); for(co=0;co<3;co++) complex_AmiB(Y[1][co],X[1][co],X[3][co]); } else { for(co=0;co<3;co++) complex_AmiB(Y[0][co],X[0][co],X[2][co]); for(co=0;co<3;co++) complex_ApiB(Y[1][co],X[1][co],X[3][co]); } } else if ( mu == 3 ) { if ( pm ==0 ) { for(co=0;co<3;co++) complex_add(Y[0][co],X[0][co],X[2][co]); for(co=0;co<3;co++) complex_add(Y[1][co],X[1][co],X[3][co]); } else { for(co=0;co<3;co++) complex_sub(Y[0][co],X[0][co],X[2][co]); for(co=0;co<3;co++) complex_sub(Y[1][co],X[1][co],X[3][co]); } } make_inst(DIRECTIVE,Target,lab_continue); /////////////////////////////////////////////////////////////// // Y contains spin projection of forward neighbour in mu direction // Repromote to Y to 4 spinor /////////////////////////////////////////////////////////////// for(int co_y=0;co_y<3;co_y++){ if ( (mu==0) && (pm==0) ) complex_AmiB(Y[2][co_y],Z,Y[1][co_y]); if ( (mu==0) && (pm==1) ) complex_ApiB(Y[2][co_y],Z,Y[1][co_y]); if ( (mu==1) && (pm==0) ) complex_add (Y[2][co_y],Z,Y[1][co_y]); if ( (mu==1) && (pm==1) ) complex_sub (Y[2][co_y],Z,Y[1][co_y]); if ( (mu==2) && (pm==0) ) complex_AmiB(Y[2][co_y],Z,Y[0][co_y]); if ( (mu==2) && (pm==1) ) complex_ApiB(Y[2][co_y],Z,Y[0][co_y]); if ( (mu==3) && (pm==0) ) complex_add (Y[2][co_y],Z,Y[0][co_y]); if ( (mu==3) && (pm==1) ) complex_sub (Y[2][co_y],Z,Y[0][co_y]); if ( (mu==0) && (pm==0) ) complex_AmiB(Y[3][co_y],Z,Y[0][co_y]); if ( (mu==0) && (pm==1) ) complex_ApiB(Y[3][co_y],Z,Y[0][co_y]); if ( (mu==1) && (pm==0) ) complex_sub (Y[3][co_y],Z,Y[0][co_y]); if ( (mu==1) && (pm==1) ) complex_add (Y[3][co_y],Z,Y[0][co_y]); if ( (mu==2) && (pm==0) ) complex_ApiB(Y[3][co_y],Z,Y[1][co_y]); if ( (mu==2) && (pm==1) ) complex_AmiB(Y[3][co_y],Z,Y[1][co_y]); if ( (mu==3) && (pm==0) ) complex_add (Y[3][co_y],Z,Y[1][co_y]); if ( (mu==3) && (pm==1) ) complex_sub (Y[3][co_y],Z,Y[1][co_y]); } /////////////////////////////////////////////////////////////// // Load X /////////////////////////////////////////////////////////////// for(int co_x=0;co_x<3;co_x++){ for(int sp=0;sp<4;sp++) { complex_load(X[sp][co_x],PSIIMM[sp][co_x][0],X_p,FourSpinType); } } /////////////////////////////////////////////////////////////// // Spin trace tensor product /////////////////////////////////////////////////////////////// for(int co_x=0;co_x<3;co_x++){ // Spin trace outer product for ( int co_y=0;co_y<3;co_y++) complex_load (F[co_y],GIMM[co_y][co_x][0],F_p_s); for ( int co_y=0;co_y<3;co_y++) complex_conjmadd(F[co_y],X[0][co_x],Y[0][co_y]); for ( int co_y=0;co_y<3;co_y++) complex_conjmadd(F[co_y],X[1][co_x],Y[1][co_y]); for ( int co_y=0;co_y<3;co_y++) complex_conjmadd(F[co_y],X[2][co_x],Y[2][co_y]); for ( int co_y=0;co_y<3;co_y++) complex_conjmadd(F[co_y],X[3][co_x],Y[3][co_y]); for ( int co_y=0;co_y<3;co_y++) complex_store(F[co_y],GIMM[co_y][co_x][0],F_p_s); } queue_load_addr(F_p_s,MAT_IMM,F_p_s); } queue_iadd_imm(X_p,X_p,PSI_IMM); queue_iadd_imm(s_offset,s_offset,CHI_IMM); stop_loop(branchls,s); queue_iadd_imm(F_p,F_p_s,ZERO_IMM); queue_load_addr(tab,TAB_IMM[16],tab); stop_loop(branchsite,length); make_inst(DIRECTIVE,Target,retno); /* * * EPILOGUE * */ restore_regs(); free_stack(); make_inst(DIRECTIVE,Exit_Routine,name); return; }
static void generate_geometry_naive_surface_nets() { static Vector<int> inds(65*65*2); for (int z = 0; z < 64; z++) { for (int y = 0; y < 64; y++) { for (int x = 0; x < 64; x++) { const Vec3i p(x, y, z); const float vs[8] = { voxels[offset_3d({x, y, z}, Vec3i(65))], voxels[offset_3d({x+1, y, z}, Vec3i(65))], voxels[offset_3d({x, y+1, z}, Vec3i(65))], voxels[offset_3d({x+1, y+1, z}, Vec3i(65))], voxels[offset_3d({x, y, z+1}, Vec3i(65))], voxels[offset_3d({x+1, y, z+1}, Vec3i(65))], voxels[offset_3d({x, y+1, z+1}, Vec3i(65))], voxels[offset_3d({x+1, y+1, z+1}, Vec3i(65))], }; const int config_n = ((vs[0] < 0.0f) << 0) | ((vs[1] < 0.0f) << 1) | ((vs[2] < 0.0f) << 2) | ((vs[3] < 0.0f) << 3) | ((vs[4] < 0.0f) << 4) | ((vs[5] < 0.0f) << 5) | ((vs[6] < 0.0f) << 6) | ((vs[7] < 0.0f) << 7); if (config_n == 0 || config_n == 255) continue; Vec3f average(0); int average_n = 0; auto do_edge = [&](float va, float vb, int axis, const Vec3i &p) { if ((va < 0.0) == (vb < 0.0)) return; Vec3f v = ToVec3f(p); v[axis] += va / (va - vb); average += v; average_n++; }; do_edge(vs[0], vs[1], 0, Vec3i(x, y, z)); do_edge(vs[2], vs[3], 0, Vec3i(x, y+1, z)); do_edge(vs[4], vs[5], 0, Vec3i(x, y, z+1)); do_edge(vs[6], vs[7], 0, Vec3i(x, y+1, z+1)); do_edge(vs[0], vs[2], 1, Vec3i(x, y, z)); do_edge(vs[1], vs[3], 1, Vec3i(x+1, y, z)); do_edge(vs[4], vs[6], 1, Vec3i(x, y, z+1)); do_edge(vs[5], vs[7], 1, Vec3i(x+1, y, z+1)); do_edge(vs[0], vs[4], 2, Vec3i(x, y, z)); do_edge(vs[1], vs[5], 2, Vec3i(x+1, y, z)); do_edge(vs[2], vs[6], 2, Vec3i(x, y+1, z)); do_edge(vs[3], vs[7], 2, Vec3i(x+1, y+1, z)); const Vec3f v = average / Vec3f(average_n); inds[offset_3d_slab(p, Vec3i(65))] = vertices.length(); vertices.append({v, Vec3f(0)}); const bool flip = vs[0] < 0.0f; if (p.y > 0 && p.z > 0 && (vs[0] < 0.0f) != (vs[1] < 0.0f)) { quad(flip, inds[offset_3d_slab(Vec3i(p.x, p.y, p.z), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x, p.y, p.z-1), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x, p.y-1, p.z-1), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x, p.y-1, p.z), Vec3i(65))] ); } if (p.x > 0 && p.z > 0 && (vs[0] < 0.0f) != (vs[2] < 0.0f)) { quad(flip, inds[offset_3d_slab(Vec3i(p.x, p.y, p.z), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x-1, p.y, p.z), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x-1, p.y, p.z-1), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x, p.y, p.z-1), Vec3i(65))] ); } if (p.x > 0 && p.y > 0 && (vs[0] < 0.0f) != (vs[4] < 0.0f)) { quad(flip, inds[offset_3d_slab(Vec3i(p.x, p.y, p.z), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x, p.y-1, p.z), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x-1, p.y-1, p.z), Vec3i(65))], inds[offset_3d_slab(Vec3i(p.x-1, p.y, p.z), Vec3i(65))] ); } }}} for (Vertex &v : vertices) v.normal = normalize(v.normal); }
static void generate_geometry_smooth() { static Vector<Vec3i> slab_inds(65*65*2); for (int z = 0; z < 64; z++) { for (int y = 0; y < 64; y++) { for (int x = 0; x < 64; x++) { const Vec3i p(x, y, z); const float vs[8] = { voxels[offset_3d({x, y, z}, Vec3i(65))], voxels[offset_3d({x+1, y, z}, Vec3i(65))], voxels[offset_3d({x, y+1, z}, Vec3i(65))], voxels[offset_3d({x+1, y+1, z}, Vec3i(65))], voxels[offset_3d({x, y, z+1}, Vec3i(65))], voxels[offset_3d({x+1, y, z+1}, Vec3i(65))], voxels[offset_3d({x, y+1, z+1}, Vec3i(65))], voxels[offset_3d({x+1, y+1, z+1}, Vec3i(65))], }; const int config_n = ((vs[0] < 0.0f) << 0) | ((vs[1] < 0.0f) << 1) | ((vs[2] < 0.0f) << 2) | ((vs[3] < 0.0f) << 3) | ((vs[4] < 0.0f) << 4) | ((vs[5] < 0.0f) << 5) | ((vs[6] < 0.0f) << 6) | ((vs[7] < 0.0f) << 7); if (config_n == 0 || config_n == 255) continue; auto do_edge = [&](int n_edge, float va, float vb, int axis, const Vec3i &p) { if ((va < 0.0) == (vb < 0.0)) return; Vec3f v = ToVec3f(p); v[axis] += va / (va - vb); slab_inds[offset_3d_slab(p, Vec3i(65))][axis] = vertices.length(); vertices.append({v, Vec3f(0)}); }; if (p.y == 0 && p.z == 0) do_edge(0, vs[0], vs[1], 0, Vec3i(x, y, z)); if (p.z == 0) do_edge(1, vs[2], vs[3], 0, Vec3i(x, y+1, z)); if (p.y == 0) do_edge(2, vs[4], vs[5], 0, Vec3i(x, y, z+1)); do_edge(3, vs[6], vs[7], 0, Vec3i(x, y+1, z+1)); if (p.x == 0 && p.z == 0) do_edge(4, vs[0], vs[2], 1, Vec3i(x, y, z)); if (p.z == 0) do_edge(5, vs[1], vs[3], 1, Vec3i(x+1, y, z)); if (p.x == 0) do_edge(6, vs[4], vs[6], 1, Vec3i(x, y, z+1)); do_edge(7, vs[5], vs[7], 1, Vec3i(x+1, y, z+1)); if (p.x == 0 && p.y == 0) do_edge(8, vs[0], vs[4], 2, Vec3i(x, y, z)); if (p.y == 0) do_edge(9, vs[1], vs[5], 2, Vec3i(x+1, y, z)); if (p.x == 0) do_edge(10, vs[2], vs[6], 2, Vec3i(x, y+1, z)); do_edge(11, vs[3], vs[7], 2, Vec3i(x+1, y+1, z)); int edge_indices[12]; edge_indices[0] = slab_inds[offset_3d_slab({p.x, p.y, p.z }, Vec3i(65))].x; edge_indices[1] = slab_inds[offset_3d_slab({p.x, p.y+1, p.z }, Vec3i(65))].x; edge_indices[2] = slab_inds[offset_3d_slab({p.x, p.y, p.z+1}, Vec3i(65))].x; edge_indices[3] = slab_inds[offset_3d_slab({p.x, p.y+1, p.z+1}, Vec3i(65))].x; edge_indices[4] = slab_inds[offset_3d_slab({p.x, p.y, p.z }, Vec3i(65))].y; edge_indices[5] = slab_inds[offset_3d_slab({p.x+1, p.y, p.z }, Vec3i(65))].y; edge_indices[6] = slab_inds[offset_3d_slab({p.x, p.y, p.z+1}, Vec3i(65))].y; edge_indices[7] = slab_inds[offset_3d_slab({p.x+1, p.y, p.z+1}, Vec3i(65))].y; edge_indices[8] = slab_inds[offset_3d_slab({p.x, p.y, p.z}, Vec3i(65))].z; edge_indices[9] = slab_inds[offset_3d_slab({p.x+1, p.y, p.z}, Vec3i(65))].z; edge_indices[10] = slab_inds[offset_3d_slab({p.x, p.y+1, p.z}, Vec3i(65))].z; edge_indices[11] = slab_inds[offset_3d_slab({p.x+1, p.y+1, p.z}, Vec3i(65))].z; const uint64_t config = marching_cube_tris[config_n]; const int n_triangles = config & 0xF; const int n_indices = n_triangles * 3; const int index_base = indices.length(); int offset = 4; for (int i = 0; i < n_indices; i++) { const int edge = (config >> offset) & 0xF; indices.append(edge_indices[edge]); offset += 4; } for (int i = 0; i < n_triangles; i++) { triangle( indices[index_base+i*3+0], indices[index_base+i*3+1], indices[index_base+i*3+2]); } }}} for (Vertex &v : vertices) v.normal = normalize(v.normal); }