static double HPCC_Stream() { double scalar = 3.0, times[NTIMES], curGBs, mintime = FLT_MAX; int j, k; for(k=0;k<NTIMES;k++) { #pragma xmp barrier times[k] = -xmp_wtime(); #ifdef _FUJITSU #pragma loop xfill #pragma loop noalias #endif #pragma omp parallel for for(j=0;j<array_elements;j++) a[j] = b[j] + scalar*c[j]; #pragma xmp barrier times[k] += xmp_wtime(); } for(k=1;k<NTIMES;k++) mintime = Mmin(mintime, times[k]); curGBs = (mintime > 0.0 ? 1.0 / mintime : -1.0); curGBs *= 1e-9 * 3 * sizeof(double) * array_elements; #pragma xmp reduction(+:curGBs) return curGBs; }
int main() { int i, j, k, nn; float gosa; double cpu, cpu0, cpu1, flop, target; int myrank = xmp_node_num() - 1; target = 60.0; omega = 0.8; imax = MIMAX; jmax = MJMAX; kmax = MKMAX; /* * Initializing matrixes */ initmt(); if (myrank == 0) { printf("mimax = %d mjmax = %d mkmax = %d\n",MIMAX, MJMAX, MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); } nn= 3; if (myrank == 0) { printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } cpu0= xmp_wtime(); gosa= jacobi(nn); cpu1= xmp_wtime(); cpu= cpu1 - cpu0; flop= fflop(imax,jmax,kmax); if (myrank == 0) { printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); #pragma xmp reduction (max:nn) if (myrank == 0) { printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ cpu0 = xmp_wtime(); gosa = jacobi(nn); cpu1 = xmp_wtime(); cpu= cpu1 - cpu0; if (myrank == 0) { printf(" Loop executed for %d times\n",nn); printf(" Gosa : %e \n",gosa); printf(" MFLOPS measured : %f\tcpu : %f\n",mflops(nn,cpu,flop),cpu); printf(" Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } return (0); }