int main() { makea(colidx); }
int main(int argc, char *argv[]) { int i, j, k, it; double zeta; double rnorm; double norm_temp1, norm_temp2; double t, mflops, tmax; //char Class; logical verified; double zeta_verify_value, epsilon, err; char *t_names[T_last]; //openmp environment setting omp_set_dynamic(0); omp_set_num_threads(8); for (i = 0; i < T_last; i++) { timer_clear(i); } timer_start(T_init); firstrow = 0; lastrow = NA-1; firstcol = 0; lastcol = NA-1; zeta_verify_value = VALID_RESULT; printf("\nCG start...\n\n"); printf(" Size: %11d\n", NA); printf(" Iterations: %5d\n", NITER); printf("\n"); naa = NA; nzz = NZ; //--------------------------------------------------------------------- // Inialize random number generator //--------------------------------------------------------------------- tran = 314159265.0; amult = 1220703125.0; zeta = randlc(&tran, amult); //--------------------------------------------------------------------- // //--------------------------------------------------------------------- makea(naa, nzz, a, colidx, rowstr, firstrow, lastrow, firstcol, lastcol, arow, (int (*)[NONZER+1])(void*)acol, (double (*)[NONZER+1])(void*)aelt, iv); //--------------------------------------------------------------------- // Note: as a result of the above call to makea: // values of j used in indexing rowstr go from 0 --> lastrow-firstrow // values of colidx which are col indexes go from firstcol --> lastcol // So: // Shift the col index vals from actual (firstcol --> lastcol ) // to local, i.e., (0 --> lastcol-firstcol) //--------------------------------------------------------------------- #pragma omp parallel for collapse(2) for (j = 0; j < lastrow - firstrow + 1; j++) { for (k = rowstr[j]; k < rowstr[j+1]; k++) { colidx[k] = colidx[k] - firstcol; } } //--------------------------------------------------------------------- // set starting vector to (1, 1, .... 1) //--------------------------------------------------------------------- #pragma omp parallel for for (i = 0; i < NA+1; i++) { x[i] = 1.0; } #pragma omp parallel for for (j = 0; j < lastcol - firstcol + 1; j++) { q[j] = 0.0; z[j] = 0.0; r[j] = 0.0; p[j] = 0.0; } zeta = 0.0; //--------------------------------------------------------------------- //----> // Do one iteration untimed to init all code and data page tables //----> (then reinit, start timing, to niter its) //--------------------------------------------------------------------- for (it = 1; it <= 1; it++) { //--------------------------------------------------------------------- // The call to the conjugate gradient routine: //--------------------------------------------------------------------- conj_grad(colidx, rowstr, x, z, a, p, q, r, &rnorm); //--------------------------------------------------------------------- // zeta = shift + 1/(x.z) // So, first: (x.z) // Also, find norm of z // So, first: (z.z) //--------------------------------------------------------------------- norm_temp1 = 0.0; norm_temp2 = 0.0; #pragma omp parallel for reduction(+:norm_temp1, norm_temp2) for (j = 0; j < lastcol - firstcol + 1; j++) { norm_temp1 = norm_temp1 + x[j] * z[j]; norm_temp2 = norm_temp2 + z[j] * z[j]; } norm_temp2 = 1.0 / sqrt(norm_temp2); //--------------------------------------------------------------------- // Normalize z to obtain x //--------------------------------------------------------------------- #pragma omp parallel for for (j = 0; j < lastcol - firstcol + 1; j++) { x[j] = norm_temp2 * z[j]; } } // end of do one iteration untimed //--------------------------------------------------------------------- // set starting vector to (1, 1, .... 1) //--------------------------------------------------------------------- #pragma omp parallel for for (i = 0; i < NA+1; i++) { x[i] = 1.0; } zeta = 0.0; timer_stop(T_init); printf(" Initialization time = %15.3f seconds\n", timer_read(T_init)); timer_start(T_bench); //--------------------------------------------------------------------- //----> // Main Iteration for inverse power method //----> //--------------------------------------------------------------------- /* #pragma omp parallel for reduction(+:zeta) private(norm_temp1, norm_temp2) firstprivate(x, z, p, q) */ for (it = 1; it <= NITER; it++) { //--------------------------------------------------------------------- // The call to the conjugate gradient routine: //--------------------------------------------------------------------- if (timeron) timer_start(T_conj_grad); conj_grad(colidx, rowstr, x, z, a, p, q, r, &rnorm); if (timeron) timer_stop(T_conj_grad); //--------------------------------------------------------------------- // zeta = shift + 1/(x.z) // So, first: (x.z) // Also, find norm of z // So, first: (z.z) //--------------------------------------------------------------------- norm_temp1 = 0.0; norm_temp2 = 0.0; #pragma omp parallel for reduction(+:norm_temp1, norm_temp2) for (j = 0; j < lastcol - firstcol + 1; j++) { norm_temp1 = norm_temp1 + x[j]*z[j]; norm_temp2 = norm_temp2 + z[j]*z[j]; } norm_temp2 = 1.0 / sqrt(norm_temp2); zeta = SHIFT + 1.0 / norm_temp1; if (it == 1) printf("\n iteration ||r|| zeta\n"); printf(" %5d %20.14E%20.13f\n", it, rnorm, zeta); //--------------------------------------------------------------------- // Normalize z to obtain x //--------------------------------------------------------------------- #pragma omp parallel for for (j = 0; j < lastcol - firstcol + 1; j++) { x[j] = norm_temp2 * z[j]; } } // end of main iter inv pow meth timer_stop(T_bench); //--------------------------------------------------------------------- // End of timed section //--------------------------------------------------------------------- t = timer_read(T_bench); printf("\nComplete...\n"); epsilon = 1.0e-10; err = fabs(zeta - zeta_verify_value) / zeta_verify_value; if (err <= epsilon) { verified = true; printf(" VERIFICATION SUCCESSFUL\n"); printf(" Zeta is %20.13E\n", zeta); printf(" Error is %20.13E\n", err); } else { verified = false; printf(" VERIFICATION FAILED\n"); printf(" Zeta %20.13E\n", zeta); printf(" The correct zeta is %20.13E\n", zeta_verify_value); } printf("\n\nExecution time : %lf seconds\n\n", t); return 0; }
int main(int argc, char **argv) { int i, j, k, it; int nthreads = 1; double zeta; double rnorm; double norm_temp11; double norm_temp12; double t, mflops; char cclass; boolean verified; double zeta_verify_value, epsilon; firstrow = 1; lastrow = NA; firstcol = 1; lastcol = NA; if (NA == 1400 && NONZER == 7 && NITER == 15 && SHIFT == 10.0) { cclass = 'S'; zeta_verify_value = 8.5971775078648; } else if (NA == 7000 && NONZER == 8 && NITER == 15 && SHIFT == 12.0) { cclass = 'W'; zeta_verify_value = 10.362595087124; } else if (NA == 14000 && NONZER == 11 && NITER == 15 && SHIFT == 20.0) { cclass = 'A'; zeta_verify_value = 17.130235054029; } else if (NA == 75000 && NONZER == 13 && NITER == 75 && SHIFT == 60.0) { cclass = 'B'; zeta_verify_value = 22.712745482631; } else if (NA == 150000 && NONZER == 15 && NITER == 75 && SHIFT == 110.0) { cclass = 'C'; zeta_verify_value = 28.973605592845; } else { cclass = 'U'; } printf("\n\n NAS Parallel Benchmarks 2.3 OpenMP C version" " - CG Benchmark\n"); printf(" Size: %10d\n", NA); printf(" Iterations: %5d\n", NITER); naa = NA; nzz = NZ; /*-------------------------------------------------------------------- c Initialize random number generator c-------------------------------------------------------------------*/ tran = 314159265.0; amult = 1220703125.0; zeta = randlc( &tran, amult ); /*-------------------------------------------------------------------- c c-------------------------------------------------------------------*/ makea(naa, nzz, a, colidx, rowstr, NONZER, firstrow, lastrow, firstcol, lastcol, RCOND, arow, acol, aelt, v, iv, SHIFT); /*--------------------------------------------------------------------- c Note: as a result of the above call to makea: c values of j used in indexing rowstr go from 1 --> lastrow-firstrow+1 c values of colidx which are col indexes go from firstcol --> lastcol c So: c Shift the col index vals from actual (firstcol --> lastcol ) c to local, i.e., (1 --> lastcol-firstcol+1) c---------------------------------------------------------------------*/ #pragma omp parallel private(it,i,j,k) { #pragma omp for nowait for (j = 1; j <= lastrow - firstrow + 1; j++) { for (k = rowstr[j]; k < rowstr[j+1]; k++) { colidx[k] = colidx[k] - firstcol + 1; } } /*-------------------------------------------------------------------- c set starting vector to (1, 1, .... 1) c-------------------------------------------------------------------*/ #pragma omp for nowait for (i = 1; i <= NA+1; i++) { x[i] = 1.0; } #pragma omp single zeta = 0.0; /*------------------------------------------------------------------- c----> c Do one iteration untimed to init all code and data page tables c----> (then reinit, start timing, to niter its) c-------------------------------------------------------------------*/ for (it = 1; it <= 1; it++) { /*-------------------------------------------------------------------- c The call to the conjugate gradient routine: c-------------------------------------------------------------------*/ conj_grad (colidx, rowstr, x, z, a, p, q, r, w, &rnorm); /*-------------------------------------------------------------------- c zeta = shift + 1/(x.z) c So, first: (x.z) c Also, find norm of z c So, first: (z.z) c-------------------------------------------------------------------*/ #pragma omp single { norm_temp11 = 0.0; norm_temp12 = 0.0; } /* end single */ #pragma omp for reduction(+:norm_temp11,norm_temp12) for (j = 1; j <= lastcol-firstcol+1; j++) { norm_temp11 = norm_temp11 + x[j]*z[j]; norm_temp12 = norm_temp12 + z[j]*z[j]; } #pragma omp single norm_temp12 = 1.0 / sqrt( norm_temp12 ); /*-------------------------------------------------------------------- c Normalize z to obtain x c-------------------------------------------------------------------*/ #pragma omp for for (j = 1; j <= lastcol-firstcol+1; j++) { x[j] = norm_temp12*z[j]; } } /* end of do one iteration untimed */ /*-------------------------------------------------------------------- c set starting vector to (1, 1, .... 1) c-------------------------------------------------------------------*/ #pragma omp for nowait for (i = 1; i <= NA+1; i++) { x[i] = 1.0; } #pragma omp single zeta = 0.0; } /* end parallel */ timer_clear( 1 ); timer_start( 1 ); /*-------------------------------------------------------------------- c----> c Main Iteration for inverse power method c----> c-------------------------------------------------------------------*/ #pragma omp parallel private(it,i,j,k) { for (it = 1; it <= NITER; it++) { /*-------------------------------------------------------------------- c The call to the conjugate gradient routine: c-------------------------------------------------------------------*/ conj_grad(colidx, rowstr, x, z, a, p, q, r, w, &rnorm); /*-------------------------------------------------------------------- c zeta = shift + 1/(x.z) c So, first: (x.z) c Also, find norm of z c So, first: (z.z) c-------------------------------------------------------------------*/ #pragma omp single { norm_temp11 = 0.0; norm_temp12 = 0.0; } /* end single */ #pragma omp for reduction(+:norm_temp11,norm_temp12) for (j = 1; j <= lastcol-firstcol+1; j++) { norm_temp11 = norm_temp11 + x[j]*z[j]; norm_temp12 = norm_temp12 + z[j]*z[j]; } #pragma omp single { norm_temp12 = 1.0 / sqrt( norm_temp12 ); zeta = SHIFT + 1.0 / norm_temp11; } /* end single */ #pragma omp master { if( it == 1 ) { printf(" iteration ||r|| zeta\n"); } printf(" %5d %20.14e%20.13e\n", it, rnorm, zeta); } /* end master */ /*-------------------------------------------------------------------- c Normalize z to obtain x c-------------------------------------------------------------------*/ #pragma omp for for (j = 1; j <= lastcol-firstcol+1; j++) { x[j] = norm_temp12*z[j]; } } /* end of main iter inv pow meth */ #if defined(_OPENMP) #pragma omp master nthreads = omp_get_num_threads(); #endif /* _OPENMP */ } /* end parallel */ timer_stop( 1 ); /*-------------------------------------------------------------------- c End of timed section c-------------------------------------------------------------------*/ t = timer_read( 1 ); printf(" Benchmark completed\n"); epsilon = 1.0e-10; if (cclass != 'U') { if (fabs(zeta - zeta_verify_value) <= epsilon) { verified = TRUE; printf(" VERIFICATION SUCCESSFUL\n"); printf(" Zeta is %20.12e\n", zeta); printf(" Error is %20.12e\n", zeta - zeta_verify_value); } else { verified = FALSE; printf(" VERIFICATION FAILED\n"); printf(" Zeta %20.12e\n", zeta); printf(" The correct zeta is %20.12e\n", zeta_verify_value); } } else { verified = FALSE; printf(" Problem size unknown\n"); printf(" NO VERIFICATION PERFORMED\n"); } if ( t != 0.0 ) { mflops = (2.0*NITER*NA) * (3.0+(NONZER*(NONZER+1)) + 25.0*(5.0+(NONZER*(NONZER+1))) + 3.0 ) / t / 1000000.0; } else { mflops = 0.0; } c_print_results("CG", cclass, NA, 0, 0, NITER, nthreads, t, mflops, " floating point", verified, NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, CS7); }