int emit_vrr_build_macro() { int old_am = Params.old_am; int new_am = Params.opt_am; int max_class_size = Params.max_class_size; int am_to_inline = Params.max_am_to_inline_vrr_worker; FILE *code; int i, j, k, l, f; int a, b; int flag; int am[2][3]; int am_in[2]; int current_highest_am, to_inline; int nflip = 0; int t1, t2, t3, t4; int class_size; int type; int max1 = 0; int max2 = 0; int foo; int la, lc, lc_min, lc_max; int k1max, k2max, k3max; int split,num_subfunctions,subbatch_length; int curr_count,curr_subfunction; static char *k4[] = {"lpoz","lpon"}; static const char *k1_suff = "o2z"; static const char *k2_suff = "o2zn"; static const char *k3_suff = "o2n"; char *code_name; char *function_name; char **subfunction_name; char *cpcommand; int errcod; k1 = (char **) malloc(new_am*sizeof(char *)); k2 = (char **) malloc(new_am*sizeof(char *)); k3 = (char **) malloc(new_am*sizeof(char *)); for(i=1;i<=new_am;i++) { j = strlen(number[i]); k1[i-1] = (char*) malloc((4+j)*sizeof(char)); k2[i-1] = (char*) malloc((5+j)*sizeof(char)); k3[i-1] = (char*) malloc((4+j)*sizeof(char)); strcpy(k1[i-1],number[i]); strcpy(k2[i-1],number[i]); strcpy(k3[i-1],number[i]); strcat(k1[i-1],k1_suff); strcat(k2[i-1],k2_suff); strcat(k3[i-1],k3_suff); } code_name = (char *) malloc(sizeof(char)*21); cpcommand = (char *) malloc(sizeof(char)*50); function_name = (char *) malloc(sizeof(char)*18); for(la=0;la<=new_am;la++) { lc_min = (la >= old_am + 1) ? 0 : old_am + 1; lc_max = new_am; for(lc=lc_min;lc<=lc_max;lc++) { /* Is this function to be made inline */ current_highest_am = (la > lc) ? la : lc; to_inline = (current_highest_am <= am_to_inline) ? 1 : 0; if (!to_inline) continue; fprintf(outfile," AM_a = %c AM_c = %c\n",am_letter[la],am_letter[lc]); am_in[0] = la; am_in[1] = lc; if (la == 0) { a = 1; k2max = la; k3max = lc - 1; } else { a = 0; k2max = lc; k1max = la - 1; } foo = 5; if(a==0) foo = 4; class_size = ((am_in[a]+1)*(am_in[a]+2)*(am_in[a^1]+1)*(am_in[a^1]+2))/4; /* If the routine has to be split AND inlined - the user probably doesn't know what he/she is doing */ if (class_size > max_class_size) punt("MAX_CLASS_SIZE is too small with the given inlining thresholds"); else { split = 0; } if(a==0) foo = 4; sprintf(function_name,"build_%c0%c0",am_letter[la],am_letter[lc]); sprintf(code_name,"build_%c0%c0.h",am_letter[la],am_letter[lc]); code = fopen(code_name,"w"); /*target |I0[],I1[] | |I2[],I3[] | | | I4[] | | | | */ t1 = t2 = t3 = t4 = 0; /* print local variable declarations */ fprintf(code,"#ifndef _libint_%s\n",function_name); fprintf(code,"#define _libint_%s\n",function_name); fprintf(code," /* These machine-generated functions compute a quartet of (%cs|%cs) integrals */\n\n",am_letter[la],am_letter[lc]); fprintf(code,"#define _%s(Data, vp, I0, I1, I2, I3, I4)\\\n{\\\n",function_name); declare_localv(a,k1max,k2max,k3max,code); define_localv(a,foo,k1max,k2max,k3max,code); fprintf(code,"\\\n"); for(i = 0; i <= am_in[0]; i++){ am[0][0] = am_in[0] - i; for(j = 0; j <= i; j++){ am[0][1] = i - j; am[0][2] = j; for(k = 0; k <= am_in[1]; k++){ am[1][0] = am_in[1] - k; for(l = 0; l <= k; l++){ am[1][1] = k - l; am[1][2] = l; if(am[a][2]) b = 2; if(am[a][1]) b = 1; if(am[a][0]) b = 0; am[a][b] = am[a][b] - 1; am_in[a] = am_in[a] - 1; t2 = hash(am,am_in); fprintf(code, "*(target++) = U%d%d*i0[%d] + U%d%d*i1[%d]", a*2, b, t2, foo, b , t2); if(am[a][b]){ am[a][b] = am[a][b] - 1; am_in[a] = am_in[a] - 1; t3 = hash(am,am_in); fprintf(code, "\\\n + (%s)*(i2[%d] - (%s)*i3[%d])", (a==0 ? k1[am[a][b]] : k3[am[a][b]]), t3, (k4[a]), t3); max1 = (max1>am[a][b]+1) ? max1 : am[a][b]+1; am[a][b] = am[a][b] + 1; am_in[a] = am_in[a] + 1; } if(am[a^1][b]){ am[a^1][b] = am[a^1][b] - 1; am_in[a^1] = am_in[a^1] - 1; t4 = hash(am,am_in); fprintf(code, "\\\n + (%s)*i4[%d]", k2[am[a^1][b]], t4); max2 = (max2>am[a^1][b]+1) ? max2 : am[a^1][b]+1; am[a^1][b] = am[a^1][b] + 1; am_in[a^1] = am_in[a^1] + 1; } fprintf(code, ";\\\n"); am[a][b] = am[a][b] + 1; am_in[a] = am_in[a] + 1; t1++; curr_count++; } } } } fprintf(code,"\\\n}\n"); fprintf(code,"\n#endif\n"); /* end of #ifndef _libint_.... */ fclose(code); printf("Done with %s\n",code_name); } } free(function_name); free(code_name); }
void emit_vrr_build() { int old_am = Params.old_am; int new_am = Params.opt_am; int max_class_size = Params.max_class_size; int am_to_inline = Params.max_am_to_inline_vrr_worker; FILE *code; int i, j, k, l, f; int a, b; /*int flag;*/ int am[2][3]; int am_in[2]; int current_highest_am, to_inline; /*int nflip = 0;*/ int t1, t2, t3, t4; int class_size; /*int type;*/ int max1 = 0; int max2 = 0; int foo; int la, lc, lc_min, lc_max; int k1max, k2max, k3max; int split,num_subfunctions,subbatch_length; int curr_count,curr_subfunction; static const char *k4[] = {"lpoz","lpon"}; static const char *k1_suff = "o2z"; static const char *k2_suff = "o2zn"; static const char *k3_suff = "o2n"; char *code_name; char *function_name; char **subfunction_name; k1 = (char **) malloc(new_am*sizeof(char *)); k2 = (char **) malloc(new_am*sizeof(char *)); k3 = (char **) malloc(new_am*sizeof(char *)); for(i=1;i<=new_am;i++) { j = strlen(number[i]); k1[i-1] = (char*) malloc((4+j)*sizeof(char)); k2[i-1] = (char*) malloc((5+j)*sizeof(char)); k3[i-1] = (char*) malloc((4+j)*sizeof(char)); strcpy(k1[i-1],number[i]); strcpy(k2[i-1],number[i]); strcpy(k3[i-1],number[i]); strcat(k1[i-1],k1_suff); strcat(k2[i-1],k2_suff); strcat(k3[i-1],k3_suff); } code_name = (char *) malloc(sizeof(char)*21); function_name = (char *) malloc(sizeof(char)*18); for(la=0;la<=new_am;la++) { lc_min = (la >= old_am + 1) ? 0 : old_am + 1; lc_max = new_am; for(lc=lc_min;lc<=lc_max;lc++) { /* Is this function to be made inline */ current_highest_am = (la > lc) ? la : lc; to_inline = (current_highest_am <= am_to_inline) ? 1 : 0; fprintf(outfile," AM_a = %c AM_c = %c\n",am_letter[la],am_letter[lc]); am_in[0] = la; am_in[1] = lc; if (la == 0) { a = 1; k2max = la; k3max = lc - 1; } else { a = 0; k2max = lc; k1max = la - 1; } foo = 5; if(a==0) foo = 4; class_size = ((am_in[a]+1)*(am_in[a]+2)*(am_in[a^1]+1)*(am_in[a^1]+2))/4; fprintf(vrr_header,"#define _BUILD_%c0%c0(Data,vp,i0,i1,i2,i3,i4) {",am_letter[la],am_letter[lc]); /* Decide if the routine has to be split into several routines producing "subbatches" */ if (class_size > max_class_size) { split = 1; num_subfunctions = ceil((double)class_size/max_class_size); subbatch_length = 1 + class_size/num_subfunctions; fprintf(vrr_header," tmp = _build_%c0%c0_0(Data,vp,i0,i1,i2,i3,i4); \\\n",am_letter[la],am_letter[lc]); for(f=1;f<num_subfunctions;f++) fprintf(vrr_header," tmp = _build_%c0%c0_%d(Data,tmp,i0,i1,i2,i3,i4); \\\n",am_letter[la],am_letter[lc],f); fprintf(vrr_header,"}\n"); if (to_inline) fprintf(vrr_header, "#ifndef INLINE_VRR_WORKER\n"); for(f=0;f<num_subfunctions;f++) { fprintf(vrr_header, " REALTYPE *_build_%c0%c0_%d(prim_data *Data, REALTYPE *, const REALTYPE *, const REALTYPE *, const REALTYPE *, const REALTYPE *, const REALTYPE *);\n", am_letter[la],am_letter[lc],f); } if (to_inline) fprintf(vrr_header, "#endif\n"); } else { split = 0; fprintf(vrr_header," _build_%c0%c0(Data,vp,i0,i1,i2,i3,i4);}\n",am_letter[la],am_letter[lc]); if (to_inline) fprintf(vrr_header,"#ifndef INLINE_VRR_WORKER\n"); fprintf(vrr_header," void _build_%c0%c0(prim_data *, REALTYPE *, const REALTYPE *, const REALTYPE *, const REALTYPE *, const REALTYPE *, const REALTYPE *);\n",am_letter[la],am_letter[lc]); if (to_inline) fprintf(vrr_header, "#endif\n"); } if(a==0) foo = 4; sprintf(function_name,"build_%c0%c0",am_letter[la],am_letter[lc]); sprintf(code_name,"build_%c0%c0.cc",am_letter[la],am_letter[lc]); code = fopen(code_name,"w"); /*target |I0[],I1[] | |I2[],I3[] | | | I4[] | | | | */ t1 = t2 = t3 = t4 = 0; /* print local variable declarations */ fprintf(code," /* These machine-generated functions compute a quartet of (%cs|%cs) integrals */\n\n",am_letter[la],am_letter[lc]); if (split) { subfunction_name = (char **) malloc (num_subfunctions*sizeof(char *)); for(i=0;i<num_subfunctions;i++) { subfunction_name[i] = (char *) malloc(20*sizeof(char)); sprintf(subfunction_name[i],"_build_%c0%c0_%d",am_letter[la],am_letter[lc],i); } } fprintf(code,"#include \"libint.h\"\n\n"); if (split == 1) { curr_subfunction = 0; curr_count = 0; fprintf(code,"REALTYPE *%s(prim_data *Data, REALTYPE *vp, const REALTYPE *I0, const REALTYPE *I1, const REALTYPE *I2, const REALTYPE *I3, const REALTYPE *I4)\n{\n", subfunction_name[0]); } else fprintf(code,"void _%s(prim_data *Data, REALTYPE *vp, const REALTYPE *I0, const REALTYPE *I1, const REALTYPE *I2, const REALTYPE *I3, const REALTYPE *I4)\n{\n",function_name); declare_localv(a,k1max,k2max,k3max,code); define_localv(a,foo,k1max,k2max,k3max,code); fprintf(code,"\n"); for(i = 0; i <= am_in[0]; i++){ am[0][0] = am_in[0] - i; for(j = 0; j <= i; j++){ am[0][1] = i - j; am[0][2] = j; for(k = 0; k <= am_in[1]; k++){ am[1][0] = am_in[1] - k; for(l = 0; l <= k; l++){ am[1][1] = k - l; am[1][2] = l; if(am[a][2]) b = 2; if(am[a][1]) b = 1; if(am[a][0]) b = 0; am[a][b] = am[a][b] - 1; am_in[a] = am_in[a] - 1; t2 = hash(am,am_in); fprintf(code, "*(vp++) = U%d%d*I0[%d] + U%d%d*I1[%d]", a*2, b, t2, foo, b , t2); if(am[a][b]){ am[a][b] = am[a][b] - 1; am_in[a] = am_in[a] - 1; t3 = hash(am,am_in); fprintf(code, "\n + (%s)*(I2[%d] - (%s)*I3[%d])", (a==0 ? k1[am[a][b]] : k3[am[a][b]]), t3, (k4[a]), t3); max1 = (max1>am[a][b]+1) ? max1 : am[a][b]+1; am[a][b] = am[a][b] + 1; am_in[a] = am_in[a] + 1; } if(am[a^1][b]){ am[a^1][b] = am[a^1][b] - 1; am_in[a^1] = am_in[a^1] - 1; t4 = hash(am,am_in); fprintf(code, "\n + (%s)*I4[%d]", k2[am[a^1][b]], t4); max2 = (max2>am[a^1][b]+1) ? max2 : am[a^1][b]+1; am[a^1][b] = am[a^1][b] + 1; am_in[a^1] = am_in[a^1] + 1; } fprintf(code, ";\n"); am[a][b] = am[a][b] + 1; am_in[a] = am_in[a] + 1; t1++; curr_count++; if (curr_count == subbatch_length && split == 1) { curr_count = 0; curr_subfunction++; fprintf(code,"return vp;\n}\n\n"); fprintf(code,"REALTYPE *%s(prim_data *Data, REALTYPE *vp, const REALTYPE *I0, const REALTYPE *I1, const REALTYPE *I2, const REALTYPE *I3, const REALTYPE *I4)\n{\n", subfunction_name[curr_subfunction]); declare_localv(a,k1max,k2max,k3max,code); define_localv(a,foo,k1max,k2max,k3max,code); fprintf(code,"\n"); } } } } } if (split == 1) fprintf(code,"return vp;\n}\n"); else fprintf(code,"\n}\n"); fclose(code); if (split == 1) { for(i=0;i<num_subfunctions;i++) free(subfunction_name[i]); free(subfunction_name); } printf("Done with %s\n",code_name); } } free(function_name); free(code_name); }