/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_covariance(int m, int n, DATA_TYPE float_n, DATA_TYPE POLYBENCH_2D(data,N,M,n,m), DATA_TYPE POLYBENCH_2D(cov,M,M,m,m), DATA_TYPE POLYBENCH_1D(mean,M,m)) { int i, j, k; #pragma scop for (j = 0; j < _PB_M; j++) { mean[j] = SCALAR_VAL(0.0); for (i = 0; i < _PB_N; i++) mean[j] += data[i][j]; mean[j] /= float_n; } for (i = 0; i < _PB_N; i++) for (j = 0; j < _PB_M; j++) data[i][j] -= mean[j]; for (i = 0; i < _PB_M; i++) for (j = i; j < _PB_M; j++) { cov[i][j] = SCALAR_VAL(0.0); for (k = 0; k < _PB_N; k++) cov[i][j] += data[k][i] * data[k][j]; cov[i][j] /= (float_n - SCALAR_VAL(1.0)); cov[j][i] = cov[i][j]; } #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_fdtd_2d(int tmax, int nx, int ny, DATA_TYPE POLYBENCH_2D(ex,NX,NY,nx,ny), DATA_TYPE POLYBENCH_2D(ey,NX,NY,nx,ny), DATA_TYPE POLYBENCH_2D(hz,NX,NY,nx,ny), DATA_TYPE POLYBENCH_1D(_fict_,TMAX,tmax)) { int t, i, j; #pragma scop for(t = 0; t < _PB_TMAX; t++) { for (j = 0; j < _PB_NY; j++) ey[0][j] = _fict_[t]; for (i = 1; i < _PB_NX; i++) for (j = 0; j < _PB_NY; j++) ey[i][j] = ey[i][j] - SCALAR_VAL(0.5)*(hz[i][j]-hz[i-1][j]); for (i = 0; i < _PB_NX; i++) for (j = 1; j < _PB_NY; j++) ex[i][j] = ex[i][j] - SCALAR_VAL(0.5)*(hz[i][j]-hz[i][j-1]); for (i = 0; i < _PB_NX - 1; i++) for (j = 0; j < _PB_NY - 1; j++) hz[i][j] = hz[i][j] - SCALAR_VAL(0.7)* (ex[i][j+1] - ex[i][j] + ey[i+1][j] - ey[i][j]); } #pragma endscop }
/* QR Decomposition with Modified Gram Schmidt: http://www.inf.ethz.ch/personal/gander/ */ static void kernel_gramschmidt(int m, int n, DATA_TYPE POLYBENCH_2D(A,M,N,m,n), DATA_TYPE POLYBENCH_2D(R,N,N,n,n), DATA_TYPE POLYBENCH_2D(Q,M,N,m,n)) { int i, j, k; DATA_TYPE nrm; #pragma scop //#pragma texture (A, R, Q) for (k = 0; k < _PB_N; k++) { nrm = SCALAR_VAL(0.0); for (i = 0; i < _PB_M; i++) nrm += A[i][k] * A[i][k]; R[k][k] = SQRT_FUN(nrm); for (i = 0; i < _PB_M; i++) Q[i][k] = A[i][k] / R[k][k]; for (j = k + 1; j < _PB_N; j++) { R[k][j] = SCALAR_VAL(0.0); for (i = 0; i < _PB_M; i++) R[k][j] = R[k][j] + Q[i][k] * A[i][j]; for (i = 0; i < _PB_M; i++) A[i][j] = A[i][j] - Q[i][k] * R[k][j]; } } #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_gesummv(int n, DATA_TYPE alpha, DATA_TYPE beta, DATA_TYPE POLYBENCH_2D(A,N,N,n,n), DATA_TYPE POLYBENCH_2D(B,N,N,n,n), DATA_TYPE POLYBENCH_1D(tmp,N,n), DATA_TYPE POLYBENCH_1D(x,N,n), DATA_TYPE POLYBENCH_1D(y,N,n)) { int i, j; #pragma scop for (i = 0; i < _PB_N; i++) { tmp[i] = SCALAR_VAL(0.0); y[i] = SCALAR_VAL(0.0); for (j = 0; j < _PB_N; j++) { tmp[i] = A[i][j] * x[j] + tmp[i]; y[i] = B[i][j] * x[j] + y[i]; } y[i] = alpha * tmp[i] + beta * y[i]; } #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_2mm(int ni, int nj, int nk, int nl, DATA_TYPE alpha, DATA_TYPE beta, DATA_TYPE POLYBENCH_2D(tmp,NI,NJ,ni,nj), DATA_TYPE POLYBENCH_2D(A,NI,NK,ni,nk), DATA_TYPE POLYBENCH_2D(B,NK,NJ,nk,nj), DATA_TYPE POLYBENCH_2D(C,NJ,NL,nj,nl), DATA_TYPE POLYBENCH_2D(D,NI,NL,ni,nl)) { int i, j, k; #pragma scop #pragma texture (A, tmp) /* D := alpha*A*B*C + beta*D */ for (i = 0; i < _PB_NI; i++) for (j = 0; j < _PB_NJ; j++) { tmp[i][j] = SCALAR_VAL(0.0); for (k = 0; k < _PB_NK; ++k) tmp[i][j] = tmp[i][j] + alpha * A[i][k] * B[k][j]; } for (i = 0; i < _PB_NI; i++) for (j = 0; j < _PB_NL; j++) { D[i][j] *= beta; for (k = 0; k < _PB_NJ; ++k) D[i][j] += tmp[i][k] * C[k][j]; } #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_jacobi_2d(int tsteps, int n, DATA_TYPE POLYBENCH_2D(A,N,N,n,n), DATA_TYPE POLYBENCH_2D(B,N,N,n,n)) { int t, i, j; #pragma scop for (t = 0; t < _PB_TSTEPS; t++) { /* printf("%i\n", t); fflush(stdout); */ #pragma omp parallel for num_threads(numThreads) private(i, j) for (i = 1; i < _PB_N - 1; i++) for (j = 1; j < _PB_N - 1; j++) B[i][j] = SCALAR_VAL(0.2) * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]); #pragma omp parallel for num_threads(numThreads) private(i, j) for (i = 1; i < _PB_N - 1; i++) for (j = 1; j < _PB_N - 1; j++) A[i][j] = SCALAR_VAL(0.2) * (B[i][j] + B[i][j-1] + B[i][1+j] + B[1+i][j] + B[i-1][j]); } #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_jacobi_2d(int tsteps, int n, DATA_TYPE POLYBENCH_2D(A,N,N,n,n), DATA_TYPE POLYBENCH_2D(B,N,N,n,n)) { int t, i, j; #pragma scop for (t = 0; t < _PB_TSTEPS; t++) { for (i = 1; i < _PB_N - 1; i++) for (j = 1; j < _PB_N - 1; j++) B[i][j] = SCALAR_VAL(0.2) * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]); for (i = 1; i < _PB_N - 1; i++) for (j = 1; j < _PB_N - 1; j++) A[i][j] = SCALAR_VAL(0.2) * (B[i][j] + B[i][j-1] + B[i][1+j] + B[1+i][j] + B[i-1][j]); } #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_durbin(int n, DATA_TYPE POLYBENCH_1D(r,N,n), DATA_TYPE POLYBENCH_1D(y,N,n)) { DATA_TYPE z[N]; DATA_TYPE alpha; DATA_TYPE beta; DATA_TYPE sum; int i,k; #pragma scop #pragma texture (r, y) y[0] = -r[0]; beta = SCALAR_VAL(1.0); alpha = -r[0]; for (k = 1; k < _PB_N; k++) { beta = (1-alpha*alpha)*beta; sum = SCALAR_VAL(0.0); for (i=0; i<k; i++) { sum += r[k-i-1]*y[i]; } alpha = - (r[k] + sum)/beta; for (i=0; i<k; i++) { z[i] = y[i] + alpha*y[k-i-1]; } for (i=0; i<k; i++) { y[i] = z[i]; } y[k] = alpha; } #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ void kernel_doitgen(int nr, int nq, int np, DATA_TYPE POLYBENCH_3D(A, NR, NQ, NP, nr, nq, np), DATA_TYPE POLYBENCH_2D(C4, NP, NP, np, np), DATA_TYPE POLYBENCH_1D(sum, NP, np)) { int r, q, p, s; #pragma scop for (r = 0; r < _PB_NR; r++) for (q = 0; q < _PB_NQ; q++) { for (p = 0; p < _PB_NP; p++) { sum[p] = SCALAR_VAL(0.0); for (s = 0; s < _PB_NP; s++) sum[p] += A[r][q][s] * C4[s][p]; } for (p = 0; p < _PB_NP; p++) A[r][q][p] = sum[p]; } #pragma endscop }
/* Based on a Fortran code fragment from Figure 5 of * "Automatic Data and Computation Decomposition on Distributed Memory Parallel Computers" * by Peizong Lee and Zvi Meir Kedem, TOPLAS, 2002 */ static void kernel_adi(int tsteps, int n, DATA_TYPE POLYBENCH_2D(u,N,N,n,n), DATA_TYPE POLYBENCH_2D(v,N,N,n,n), DATA_TYPE POLYBENCH_2D(p,N,N,n,n), DATA_TYPE POLYBENCH_2D(q,N,N,n,n)) { int t, i, j; DATA_TYPE DX, DY, DT; DATA_TYPE B1, B2; DATA_TYPE mul1, mul2; DATA_TYPE a, b, c, d, e, f; #pragma scop #pragma texture(u, v, p, q) DX = SCALAR_VAL(1.0)/(DATA_TYPE)_PB_N; DY = SCALAR_VAL(1.0)/(DATA_TYPE)_PB_N; DT = SCALAR_VAL(1.0)/(DATA_TYPE)_PB_TSTEPS; B1 = SCALAR_VAL(2.0); B2 = SCALAR_VAL(1.0); mul1 = B1 * DT / (DX * DX); mul2 = B2 * DT / (DY * DY); a = -mul1 / SCALAR_VAL(2.0); b = SCALAR_VAL(1.0)+mul1; c = a; d = -mul2 / SCALAR_VAL(2.0); e = SCALAR_VAL(1.0)+mul2; f = d; for (t=1; t<=_PB_TSTEPS; t++) { //Column Sweep for (i=1; i<_PB_N-1; i++) { v[0][i] = SCALAR_VAL(1.0); p[i][0] = SCALAR_VAL(0.0); q[i][0] = v[0][i]; for (j=1; j<_PB_N-1; j++) { p[i][j] = -c / (a*p[i][j-1]+b); q[i][j] = (-d*u[j][i-1]+(SCALAR_VAL(1.0)+SCALAR_VAL(2.0)*d)*u[j][i] - f*u[j][i+1]-a*q[i][j-1])/(a*p[i][j-1]+b); } v[_PB_N-1][i] = SCALAR_VAL(1.0); for (j=_PB_N-2; j>=1; j--) { v[j][i] = p[i][j] * v[j+1][i] + q[i][j]; } } //Row Sweep for (i=1; i<_PB_N-1; i++) { u[i][0] = SCALAR_VAL(1.0); p[i][0] = SCALAR_VAL(0.0); q[i][0] = u[i][0]; for (j=1; j<_PB_N-1; j++) { p[i][j] = -f / (d*p[i][j-1]+e); q[i][j] = (-a*v[i-1][j]+(SCALAR_VAL(1.0)+SCALAR_VAL(2.0)*a)*v[i][j] - c*v[i+1][j]-d*q[i][j-1])/(d*p[i][j-1]+e); } u[i][_PB_N-1] = SCALAR_VAL(1.0); for (j=_PB_N-2; j>=1; j--) { u[i][j] = p[i][j] * u[i][j+1] + q[i][j]; } } } #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_covariance(int m, int n, DATA_TYPE float_n, DATA_TYPE POLYBENCH_2D(data,N,M,n,m), DATA_TYPE POLYBENCH_2D(cov,M,M,m,m), DATA_TYPE POLYBENCH_1D(mean,M,m)) { int i, j, k; /* Copyright (C) 1991-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ /* This header is separate from features.h so that the compiler can include it implicitly at the start of every compilation. It must not itself include <features.h> or any other header that includes <features.h> because the implicit include comes before any feature test macros that may be defined in a source file before it first explicitly includes a system header. GCC knows the name of this header in order to preinclude it. */ /* glibc's intent is to support the IEC 559 math functionality, real and complex. If the GCC (4.9 and later) predefined macros specifying compiler intent are available, use them to determine whether the overall intent is to support these features; otherwise, presume an older compiler has intent to support these features and define these macros by default. */ /* wchar_t uses ISO/IEC 10646 (2nd ed., published 2011-03-15) / Unicode 6.0. */ /* We do not support C11 <threads.h>. */ int t1, t2, t3, t4, t5; int lb, ub, lbp, ubp, lb2, ub2; register int lbv, ubv; /* Start of CLooG code */ if (_PB_M >= 1) { lbp=0; ubp=_PB_M-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { for (t3=t2;t3<=_PB_M-1;t3++) { cov[t2][t3] = SCALAR_VAL(0.0);; } } lbp=0; ubp=_PB_M-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { mean[t2] = SCALAR_VAL(0.0);; } if (_PB_N >= 1) { lbp=0; ubp=_PB_M-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { for (t3=0;t3<=_PB_N-1;t3++) { mean[t2] += data[t3][t2];; } } } lbp=0; ubp=_PB_M-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { mean[t2] /= float_n;; } lbp=0; ubp=_PB_N-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { for (t3=0;t3<=_PB_M-1;t3++) { data[t2][t3] -= mean[t3];; } } if (_PB_N >= 1) { lbp=0; ubp=_PB_M-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { for (t3=t2;t3<=_PB_M-1;t3++) { for (t4=0;t4<=_PB_N-1;t4++) { cov[t2][t3] += data[t4][t2] * data[t4][t3];; } } } } lbp=0; ubp=_PB_M-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { for (t3=t2;t3<=_PB_M-1;t3++) { cov[t2][t3] /= (float_n - SCALAR_VAL(1.0));; cov[t3][t2] = cov[t2][t3];; } } } /* End of CLooG code */ }
/* Original code provided by Gael Deest */ static void kernel_deriche(int w, int h, DATA_TYPE alpha, DATA_TYPE POLYBENCH_2D(imgIn, W, H, w, h), DATA_TYPE POLYBENCH_2D(imgOut, W, H, w, h), DATA_TYPE POLYBENCH_2D(y1, W, H, w, h), DATA_TYPE POLYBENCH_2D(y2, W, H, w, h)) { int i,j; DATA_TYPE xm1, tm1, ym1, ym2; DATA_TYPE xp1, xp2; DATA_TYPE tp1, tp2; DATA_TYPE yp1, yp2; DATA_TYPE k; DATA_TYPE a1, a2, a3, a4, a5, a6, a7, a8; DATA_TYPE b1, b2, c1, c2; /* Copyright (C) 1991-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ /* This header is separate from features.h so that the compiler can include it implicitly at the start of every compilation. It must not itself include <features.h> or any other header that includes <features.h> because the implicit include comes before any feature test macros that may be defined in a source file before it first explicitly includes a system header. GCC knows the name of this header in order to preinclude it. */ /* glibc's intent is to support the IEC 559 math functionality, real and complex. If the GCC (4.9 and later) predefined macros specifying compiler intent are available, use them to determine whether the overall intent is to support these features; otherwise, presume an older compiler has intent to support these features and define these macros by default. */ /* wchar_t uses ISO/IEC 10646 (2nd ed., published 2011-03-15) / Unicode 6.0. */ /* We do not support C11 <threads.h>. */ int t1, t2, t3, t4, t5; int lb, ub, lbp, ubp, lb2, ub2; register int lbv, ubv; /* Start of CLooG code */ k = (SCALAR_VAL(1.0)-EXP_FUN(-alpha))*(SCALAR_VAL(1.0)-EXP_FUN(-alpha))/(SCALAR_VAL(1.0)+SCALAR_VAL(2.0)*alpha*EXP_FUN(-alpha)-EXP_FUN(SCALAR_VAL(2.0)*alpha));; a1 = a5 = k;; a2 = a6 = k*EXP_FUN(-alpha)*(alpha-SCALAR_VAL(1.0)); a3 = a7 = k*EXP_FUN(-alpha)*(alpha+SCALAR_VAL(1.0));; a4 = a8 = -k*EXP_FUN(SCALAR_VAL(-2.0)*alpha);; b1 = POW_FUN(SCALAR_VAL(2.0),-alpha);; b2 = -EXP_FUN(SCALAR_VAL(-2.0)*alpha);; c1 = c2 = 1;; if (_PB_H >= 1) { for (t2=0;t2<=_PB_W-1;t2++) { ym1 = SCALAR_VAL(0.0);; ym2 = SCALAR_VAL(0.0);; xm1 = SCALAR_VAL(0.0);; for (t4=0;t4<=_PB_H-1;t4++) { y1[t2][t4] = a1*imgIn[t2][t4] + a2*xm1 + b1*ym1 + b2*ym2;; xm1 = imgIn[t2][t4];; ym2 = ym1;; ym1 = y1[t2][t4];; } } } if (_PB_H <= 0) { for (t2=0;t2<=_PB_W-1;t2++) { ym1 = SCALAR_VAL(0.0);; ym2 = SCALAR_VAL(0.0);; xm1 = SCALAR_VAL(0.0);; } } if (_PB_H >= 1) { for (t2=0;t2<=_PB_W-1;t2++) { yp1 = SCALAR_VAL(0.0);; yp2 = SCALAR_VAL(0.0);; xp1 = SCALAR_VAL(0.0);; xp2 = SCALAR_VAL(0.0);; for (t4=-_PB_H+1;t4<=0;t4++) { y2[t2][-t4] = a3*xp1 + a4*xp2 + b1*yp1 + b2*yp2;; xp2 = xp1;; xp1 = imgIn[t2][-t4];; yp2 = yp1;; yp1 = y2[t2][-t4];; } } } if (_PB_H <= 0) { for (t2=0;t2<=_PB_W-1;t2++) { yp1 = SCALAR_VAL(0.0);; yp2 = SCALAR_VAL(0.0);; xp1 = SCALAR_VAL(0.0);; xp2 = SCALAR_VAL(0.0);; } } if (_PB_H >= 1) { lbp=0; ubp=_PB_W-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { for (t4=0;t4<=_PB_H-1;t4++) { imgOut[t2][t4] = c1 * (y1[t2][t4] + y2[t2][t4]);; } } } if (_PB_W >= 1) { for (t2=0;t2<=_PB_H-1;t2++) { tm1 = SCALAR_VAL(0.0);; ym1 = SCALAR_VAL(0.0);; ym2 = SCALAR_VAL(0.0);; for (t4=0;t4<=_PB_W-1;t4++) { y1[t4][t2] = a5*imgOut[t4][t2] + a6*tm1 + b1*ym1 + b2*ym2;; tm1 = imgOut[t4][t2];; ym2 = ym1;; ym1 = y1 [t4][t2];; } } } if (_PB_W <= 0) { for (t2=0;t2<=_PB_H-1;t2++) { tm1 = SCALAR_VAL(0.0);; ym1 = SCALAR_VAL(0.0);; ym2 = SCALAR_VAL(0.0);; } } if (_PB_W >= 1) { for (t2=0;t2<=_PB_H-1;t2++) { tp1 = SCALAR_VAL(0.0);; tp2 = SCALAR_VAL(0.0);; yp1 = SCALAR_VAL(0.0);; yp2 = SCALAR_VAL(0.0);; for (t4=-_PB_W+1;t4<=0;t4++) { y2[-t4][t2] = a7*tp1 + a8*tp2 + b1*yp1 + b2*yp2;; tp2 = tp1;; tp1 = imgOut[-t4][t2];; yp2 = yp1;; yp1 = y2[-t4][t2];; } } } if (_PB_W <= 0) { for (t2=0;t2<=_PB_H-1;t2++) { tp1 = SCALAR_VAL(0.0);; tp2 = SCALAR_VAL(0.0);; yp1 = SCALAR_VAL(0.0);; yp2 = SCALAR_VAL(0.0);; } } if (_PB_H >= 1) { lbp=0; ubp=_PB_W-1; #pragma omp parallel for private(lbv,ubv,t3,t4,t5) for (t2=lbp;t2<=ubp;t2++) { for (t4=0;t4<=_PB_H-1;t4++) { imgOut[t2][t4] = c2*(y1[t2][t4] + y2[t2][t4]);; } } } /* End of CLooG code */ }
/* Original code provided by Gael Deest */ static void kernel_deriche(int w, int h, DATA_TYPE alpha, DATA_TYPE POLYBENCH_2D(imgIn, W, H, w, h), DATA_TYPE POLYBENCH_2D(imgOut, W, H, w, h), DATA_TYPE POLYBENCH_2D(y1, W, H, w, h), DATA_TYPE POLYBENCH_2D(y2, W, H, w, h)) { int i,j; DATA_TYPE xm1, tm1, ym1, ym2; DATA_TYPE xp1, xp2; DATA_TYPE tp1, tp2; DATA_TYPE yp1, yp2; DATA_TYPE k; DATA_TYPE a1, a2, a3, a4, a5, a6, a7, a8; DATA_TYPE b1, b2, c1, c2; #pragma scop k = (SCALAR_VAL(1.0)-EXP_FUN(-alpha))*(SCALAR_VAL(1.0)-EXP_FUN(-alpha))/(SCALAR_VAL(1.0)+SCALAR_VAL(2.0)*alpha*EXP_FUN(-alpha)-EXP_FUN(SCALAR_VAL(2.0)*alpha)); a1 = a5 = k; a2 = a6 = k*EXP_FUN(-alpha)*(alpha-SCALAR_VAL(1.0)); a3 = a7 = k*EXP_FUN(-alpha)*(alpha+SCALAR_VAL(1.0)); a4 = a8 = -k*EXP_FUN(SCALAR_VAL(-2.0)*alpha); b1 = POW_FUN(SCALAR_VAL(2.0),-alpha); b2 = -EXP_FUN(SCALAR_VAL(-2.0)*alpha); c1 = c2 = 1; for (i=0; i<_PB_W; i++) { ym1 = SCALAR_VAL(0.0); ym2 = SCALAR_VAL(0.0); xm1 = SCALAR_VAL(0.0); for (j=0; j<_PB_H; j++) { y1[i][j] = a1*imgIn[i][j] + a2*xm1 + b1*ym1 + b2*ym2; xm1 = imgIn[i][j]; ym2 = ym1; ym1 = y1[i][j]; } } for (i=0; i<_PB_W; i++) { yp1 = SCALAR_VAL(0.0); yp2 = SCALAR_VAL(0.0); xp1 = SCALAR_VAL(0.0); xp2 = SCALAR_VAL(0.0); for (j=_PB_H-1; j>=0; j--) { y2[i][j] = a3*xp1 + a4*xp2 + b1*yp1 + b2*yp2; xp2 = xp1; xp1 = imgIn[i][j]; yp2 = yp1; yp1 = y2[i][j]; } } for (i=0; i<_PB_W; i++) for (j=0; j<_PB_H; j++) { imgOut[i][j] = c1 * (y1[i][j] + y2[i][j]); } for (j=0; j<_PB_H; j++) { tm1 = SCALAR_VAL(0.0); ym1 = SCALAR_VAL(0.0); ym2 = SCALAR_VAL(0.0); for (i=0; i<_PB_W; i++) { y1[i][j] = a5*imgOut[i][j] + a6*tm1 + b1*ym1 + b2*ym2; tm1 = imgOut[i][j]; ym2 = ym1; ym1 = y1 [i][j]; } } for (j=0; j<_PB_H; j++) { tp1 = SCALAR_VAL(0.0); tp2 = SCALAR_VAL(0.0); yp1 = SCALAR_VAL(0.0); yp2 = SCALAR_VAL(0.0); for (i=_PB_W-1; i>=0; i--) { y2[i][j] = a7*tp1 + a8*tp2 + b1*yp1 + b2*yp2; tp2 = tp1; tp1 = imgOut[i][j]; yp2 = yp1; yp1 = y2[i][j]; } } for (i=0; i<_PB_W; i++) for (j=0; j<_PB_H; j++) imgOut[i][j] = c2*(y1[i][j] + y2[i][j]); #pragma endscop }
/* Main computational kernel. The whole function will be timed, including the call and return. */ static void kernel_durbin(int n, DATA_TYPE POLYBENCH_1D(r,N,n), DATA_TYPE POLYBENCH_1D(y,N,n)) { DATA_TYPE z[N]; DATA_TYPE alpha; DATA_TYPE beta; DATA_TYPE sum; int i,k; /* Copyright (C) 1991-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ /* This header is separate from features.h so that the compiler can include it implicitly at the start of every compilation. It must not itself include <features.h> or any other header that includes <features.h> because the implicit include comes before any feature test macros that may be defined in a source file before it first explicitly includes a system header. GCC knows the name of this header in order to preinclude it. */ /* glibc's intent is to support the IEC 559 math functionality, real and complex. If the GCC (4.9 and later) predefined macros specifying compiler intent are available, use them to determine whether the overall intent is to support these features; otherwise, presume an older compiler has intent to support these features and define these macros by default. */ /* wchar_t uses ISO/IEC 10646 (2nd ed., published 2011-03-15) / Unicode 6.0. */ /* We do not support C11 <threads.h>. */ int t1, t2, t3, t4; int lb, ub, lbp, ubp, lb2, ub2; register int lbv, ubv; /* Start of CLooG code */ y[0] = -r[0]; beta = SCALAR_VAL(1.0); alpha = -r[0];; for (t2=1;t2<=_PB_N-1;t2++) { beta = (1-alpha*alpha)*beta; sum = SCALAR_VAL(0.0);; for (t3=0;t3<=t2-1;t3++) { sum += r[t2-t3-1]*y[t3];; } alpha = - (r[t2] + sum)/beta;; y[t2] = alpha;; for (t3=t2;t3<=2*t2-1;t3++) { z[(-t2+t3)] = y[(-t2+t3)] + alpha*y[t2-(-t2+t3)-1];; } for (t3=2*t2;t3<=3*t2-1;t3++) { y[(-2*t2+t3)] = z[(-2*t2+t3)];; } } /* End of CLooG code */ }