//c--------------------------------------------------------------------- //c //c Authors: S. Weeratunga //c V. Venkatakrishnan //c E. Barszcz //c M. Yarrow //c C-version: Rob Van der Wijngaart, Intel Corporation //c //c--------------------------------------------------------------------- // // Copyright 2010 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // int RCCE_APP(int argc, char **argv){ //c--------------------------------------------------------------------- //c //c driver for the performance evaluation of the solver for //c five coupled parabolic/elliptic partial differential equations. //c //c--------------------------------------------------------------------- char class; double mflops; int ierr, i, j, k, mm, iverified; //c--------------------------------------------------------------------- //c initialize communications //c--------------------------------------------------------------------- init_comm(&argc, &argv); // RCCE_debug_set(RCCE_DEBUG_SYNCH); //c--------------------------------------------------------------------- //c read input data //c--------------------------------------------------------------------- read_input(); //c--------------------------------------------------------------------- //c set up processor grid //c--------------------------------------------------------------------- proc_grid(); //c--------------------------------------------------------------------- //c determine the neighbors //c--------------------------------------------------------------------- neighbors(); //c--------------------------------------------------------------------- //c set up sub-domain sizes //c--------------------------------------------------------------------- subdomain(); //c--------------------------------------------------------------------- //c set up coefficients //c--------------------------------------------------------------------- setcoeff(); //c--------------------------------------------------------------------- //c set the boundary values for dependent variables //c--------------------------------------------------------------------- setbv(); //c--------------------------------------------------------------------- //c set the initial values for dependent variables //c--------------------------------------------------------------------- setiv(); //c--------------------------------------------------------------------- //c compute the forcing term based on prescribed exact solution //c--------------------------------------------------------------------- erhs(); ////c--------------------------------------------------------------------- ////c perform one SSOR iteration to touch all data and program pages ////c--------------------------------------------------------------------- ssor(1); // ////c--------------------------------------------------------------------- ////c reset the boundary and initial values ////c--------------------------------------------------------------------- setbv(); setiv(); // ////c--------------------------------------------------------------------- ////c perform the SSOR iterations ////c--------------------------------------------------------------------- ssor(itmax); ////c--------------------------------------------------------------------- ////c compute the solution error ////c--------------------------------------------------------------------- error(); ////c--------------------------------------------------------------------- ////c compute the surface integral ////c--------------------------------------------------------------------- pintgr(); // ////c--------------------------------------------------------------------- ////c verification test ////c--------------------------------------------------------------------- if (id ==0) { verify( rsdnm, errnm, &frc, &class ); mflops = (double)(itmax)*(1984.77*(double)( nx0 ) *(double)( ny0 ) *(double)( nz0 ) -10923.3*((double)( nx0+ny0+nz0 )/3.)*((double)( nx0+ny0+nz0 )/3.) +27770.9* (double)( nx0+ny0+nz0 )/3. -144010.) / (maxtime*1000000.); print_results("LU", &class, &nx0, &ny0, &nz0, &itmax, &nnodes_compiled, &num, &maxtime, &mflops, " floating point", &iverified, NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6); // FILE *perf_file; // char name[50] = "/shared/DEMOS/RCCE/NPB_LU/perf."; // char postfix[50]; // sprintf(postfix, "%d", nnodes_compiled); // strcat(name, postfix); // perf_file = fopen(name,"w"); // fprintf(perf_file, "%d", (int)mflops); // fclose(perf_file); }
int main(int argc, char *argv[]) { char Class; logical verified; double mflops; double t, tmax, trecs[t_last+1]; int i; char *t_names[t_last+1]; int num_threads; if(argc == 2) { num_threads = atoi(argv[1]); printf("Number of threads received are - %d", num_threads); } else { num_threads = -1; } if(num_threads != -1) { omp_set_dynamic(num_threads); //Nitin Chugh printf(" Dynamic environment state - %d\n", omp_get_dynamic()); omp_set_num_threads(num_threads); } printf(" Number of processors - %d\n", omp_get_num_procs()); //--------------------------------------------------------------------- // Setup info for timers //--------------------------------------------------------------------- FILE *fp; if ((fp = fopen("timer.flag", "r")) != NULL) { timeron = true; t_names[t_total] = "total"; t_names[t_rhsx] = "rhsx"; t_names[t_rhsy] = "rhsy"; t_names[t_rhsz] = "rhsz"; t_names[t_rhs] = "rhs"; t_names[t_jacld] = "jacld"; t_names[t_blts] = "blts"; t_names[t_jacu] = "jacu"; t_names[t_buts] = "buts"; t_names[t_add] = "add"; t_names[t_l2norm] = "l2norm"; fclose(fp); } else { timeron = false; } //--------------------------------------------------------------------- // read input data //--------------------------------------------------------------------- read_input(); //--------------------------------------------------------------------- // set up domain sizes //--------------------------------------------------------------------- domain(); //--------------------------------------------------------------------- // set up coefficients //--------------------------------------------------------------------- setcoeff(); //--------------------------------------------------------------------- // set the boundary values for dependent variables //--------------------------------------------------------------------- setbv(); //--------------------------------------------------------------------- // set the initial values for dependent variables //--------------------------------------------------------------------- setiv(); //--------------------------------------------------------------------- // compute the forcing term based on prescribed exact solution //--------------------------------------------------------------------- erhs(); //--------------------------------------------------------------------- // perform one SSOR iteration to touch all data pages //--------------------------------------------------------------------- ssor(1); //--------------------------------------------------------------------- // reset the boundary and initial values //--------------------------------------------------------------------- setbv(); setiv(); //--------------------------------------------------------------------- // perform the SSOR iterations //--------------------------------------------------------------------- ssor(itmax); //--------------------------------------------------------------------- // compute the solution error //--------------------------------------------------------------------- error(); //--------------------------------------------------------------------- // compute the surface integral //--------------------------------------------------------------------- pintgr(); //--------------------------------------------------------------------- // verification test //--------------------------------------------------------------------- verify ( rsdnm, errnm, frc, &Class, &verified ); mflops = (double)itmax * (1984.77 * (double)nx0 * (double)ny0 * (double)nz0 - 10923.3 * pow(((double)(nx0+ny0+nz0)/3.0), 2.0) + 27770.9 * (double)(nx0+ny0+nz0)/3.0 - 144010.0) / (maxtime*1000000.0); print_results("LU", Class, nx0, ny0, nz0, itmax, maxtime, mflops, " floating point", verified, NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, "(none)"); //--------------------------------------------------------------------- // More timers //--------------------------------------------------------------------- if (timeron) { for (i = 1; i <= t_last; i++) { trecs[i] = timer_read(i); } tmax = maxtime; if (tmax == 0.0) tmax = 1.0; printf(" SECTION Time (secs)\n"); for (i = 1; i <= t_last; i++) { printf(" %-8s:%9.3f (%6.2f%%)\n", t_names[i], trecs[i], trecs[i]*100./tmax); if (i == t_rhs) { t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-rhs", t, t*100./tmax); t = trecs[i] - t; printf(" --> %8s:%9.3f (%6.2f%%)\n", "rest-rhs", t, t*100./tmax); } } } return 0; }
int main(int argc, char *argv[]) { char Class; logical verified; double mflops; double t, tmax, trecs[t_last+1]; int i; char *t_names[t_last+1]; if (argc == 1) { fprintf(stderr, "Usage: %s <kernel directory>\n", argv[0]); exit(-1); } //--------------------------------------------------------------------- // Setup info for timers //--------------------------------------------------------------------- FILE *fp; if ((fp = fopen("timer.flag", "r")) != NULL) { timeron = true; t_names[t_total] = "total"; t_names[t_rhsx] = "rhsx"; t_names[t_rhsy] = "rhsy"; t_names[t_rhsz] = "rhsz"; t_names[t_rhs] = "rhs"; t_names[t_jacld] = "jacld"; t_names[t_blts] = "blts"; t_names[t_jacu] = "jacu"; t_names[t_buts] = "buts"; t_names[t_add] = "add"; t_names[t_l2norm] = "l2norm"; t_names[t_setbv] = "setbv"; t_names[t_setiv] = "setiv"; t_names[t_erhs] = "erhs"; t_names[t_error] = "error"; t_names[t_pintgr] = "pintgr"; t_names[t_blts1] = "blts1"; t_names[t_buts1] = "buts1"; fclose(fp); } else { timeron = false; } //--------------------------------------------------------------------- // read input data //--------------------------------------------------------------------- read_input(); //--------------------------------------------------------------------- // set up domain sizes //--------------------------------------------------------------------- domain(); //--------------------------------------------------------------------- // set up OpenCL environment //--------------------------------------------------------------------- setup_opencl(argc, argv); //--------------------------------------------------------------------- // set up coefficients //--------------------------------------------------------------------- setcoeff(); //--------------------------------------------------------------------- // set the boundary values for dependent variables //--------------------------------------------------------------------- setbv(); //--------------------------------------------------------------------- // set the initial values for dependent variables //--------------------------------------------------------------------- setiv(); //--------------------------------------------------------------------- // compute the forcing term based on prescribed exact solution //--------------------------------------------------------------------- erhs(); //--------------------------------------------------------------------- // perform one SSOR iteration to touch all data pages //--------------------------------------------------------------------- ssor(1); //--------------------------------------------------------------------- // reset the boundary and initial values //--------------------------------------------------------------------- setbv(); setiv(); //--------------------------------------------------------------------- // perform the SSOR iterations //--------------------------------------------------------------------- ssor(itmax); //--------------------------------------------------------------------- // compute the solution error //--------------------------------------------------------------------- error(); //--------------------------------------------------------------------- // compute the surface integral //--------------------------------------------------------------------- pintgr(); //--------------------------------------------------------------------- // verification test //--------------------------------------------------------------------- verify ( rsdnm, errnm, frc, &Class, &verified ); mflops = (double)itmax * (1984.77 * (double)nx0 * (double)ny0 * (double)nz0 - 10923.3 * pow(((double)(nx0+ny0+nz0)/3.0), 2.0) + 27770.9 * (double)(nx0+ny0+nz0)/3.0 - 144010.0) / (maxtime*1000000.0); c_print_results("LU", Class, nx0, ny0, nz0, itmax, maxtime, mflops, " floating point", verified, NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, "(none)", clu_GetDeviceTypeName(device_type), device_name); //--------------------------------------------------------------------- // More timers //--------------------------------------------------------------------- if (timeron) { for (i = 1; i <= t_last; i++) { trecs[i] = timer_read(i); } tmax = maxtime; if (tmax == 0.0) tmax = 1.0; printf(" SECTION Time (secs)\n"); for (i = 1; i <= t_last; i++) { printf(" %-8s:%9.4f (%6.2f%%)\n", t_names[i], trecs[i], trecs[i]*100./tmax); if (i == t_rhs) { t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-rhs", t, t*100./tmax); t = trecs[i] - t; printf(" --> %8s:%9.3f (%6.2f%%)\n", "rest-rhs", t, t*100./tmax); } } } release_opencl(); fflush(stdout); return 0; }