Esempio n. 1
0
void test(int M)
{
  /* Original iterators. */
  int i, j;
  S3(2,1) ;
  S1(3,1) ;
  S1(4,1) ;
  S4(4,2) ;
  for (i=5;i<=M+1;i++) {
    S1(i,1) ;
    for (j=2;j<=floord(i-1,2);j++) {
      S2(i,j) ;
    }
    if (i%2 == 0) {
      S4(i,i/2) ;
    }
  }
  for (i=M+2;i<=2*M-1;i++) {
    for (j=i-M;j<=floord(i-1,2);j++) {
      S2(i,j) ;
    }
    if (i%2 == 0) {
      S4(i,i/2) ;
    }
  }
  i = 2*M ;
  S4(2*M,M) ;
}
Esempio n. 2
0
void foo(int N)
{
	int i;
	int a[N];

#pragma scop
	for (i = 0; i < floord(N, 2); ++i)
		a[i] = i;
#pragma endscop
}
Esempio n. 3
0
void test(int M)
{
  /* Scattering iterators. */
  int c1, c2, c3, c4;
  /* Original iterators. */
  int i, j, k, l;
  if (M >= 3) {
    for (c1=-1;c1<=min(2,floord(M+2,4));c1++) {
      for (c2=max(ceild(2*c1-M+1,4),ceild(4*c1-M-2,4));c2<=min(0,floord(c1,2));c2++) {
        for (c3=max(max(-4*c2-2,4*c2+3),4*c1-4*c2+1);c3<=min(min(min(M+3,-4*c2+9),4*c2+2*M),4*c1-4*c2+4);c3++) {
          for (c4=max(3*c3-4*floord(c3+M+1,2)+6,4*c2-c3-4*floord(-c3+1,4)+2);c4<=min(min(4*c2+4,-c3+10),c3-2);c4+=4) {
            if ((c2 <= floord(c4-1,4)) && (c2 >= ceild(c4-4,4))) {
              S1(c1-c2,c2,(c3+c4-2)/4,(c3-c4)/2);
            }
          }
        }
      }
    }
  }
}
Esempio n. 4
0
void test(int n)
{
  /* Scattering iterators. */
  int c1, c2;
  /* Original iterators. */
  int i, j;
  for (c1=0;c1<=5*n;c1++) {
    for (c2=max(c1-n,ceild(2*c1,3));c2<=min(c1,floord(2*c1+2*n,3));c2++) {
      if (c2%2 == 0) {
        i = (-2*c1+3*c2)/2 ;
        j = c1-c2 ;
        S1((-2*c1+3*c2)/2,c1-c2) ;
      }
    }
  }
}
Esempio n. 5
0
/* Generated from ../../../git/cloog/test/classen2.cloog by CLooG 0.14.0-271-gaa1e292 gmp bits in 0.14s. */
if ((M >= 2) && (N >= 3) && (outerProcTileScatter1 >= outerProcTileScatter2) && (5*outerProcTileScatter1 <= M+2*N-4) && (5*outerProcTileScatter1 <= 5*outerProcTileScatter2+N+2) && (outerProcTileScatter2 >= 0) && (5*outerProcTileScatter2 <= M+N-2) && (outerTimeTileScatter >= outerProcTileScatter1) && (outerTimeTileScatter <= 2*outerProcTileScatter1) && (outerTimeTileScatter <= outerProcTileScatter1+outerProcTileScatter2+1) && (5*outerTimeTileScatter <= 2*M+2*N-6) && (5*outerTimeTileScatter <= 5*outerProcTileScatter1+M+2) && (5*outerTimeTileScatter >= 10*outerProcTileScatter1-2*N-2) && (5*outerTimeTileScatter <= 5*outerProcTileScatter2+M+N) && (5*outerTimeTileScatter >= 10*outerProcTileScatter2-N-3) && (5*outerTimeTileScatter <= 10*outerProcTileScatter2+N+3) && (5*outerTimeTileScatter >= 5*outerProcTileScatter1+5*outerProcTileScatter2-N-4)) {
  for (compScatter1=max(max(max(max(max(4,5*outerTimeTileScatter),5*outerProcTileScatter2+1),5*outerProcTileScatter1+5*outerProcTileScatter2-N),10*outerProcTileScatter1-2*N+2),10*outerProcTileScatter2-N+1);compScatter1<=min(min(min(min(min(5*outerTimeTileScatter+4,2*M+2*N-6),5*outerProcTileScatter1+M+2),5*outerProcTileScatter1+5*outerProcTileScatter2+5),5*outerProcTileScatter2+M+N),10*outerProcTileScatter2+N+3);compScatter1++) {
    for (compScatter2=max(max(max(max(ceild(compScatter1+4,2),5*outerProcTileScatter1),5*outerProcTileScatter2+1),compScatter1-M+2),compScatter1-5*outerProcTileScatter2-1);compScatter2<=min(min(min(min(floord(compScatter1+2*N-2,2),compScatter1),5*outerProcTileScatter1+4),compScatter1-5*outerProcTileScatter2+N),5*outerProcTileScatter2+N+2);compScatter2++) {
      for (compScatter3=max(max(5*outerProcTileScatter2,compScatter1-compScatter2+3),compScatter2-N+2);compScatter3<=min(min(compScatter2-1,5*outerProcTileScatter2+4),compScatter1-compScatter2+N);compScatter3++) {
        S1(compScatter1-compScatter2+1,-compScatter1+compScatter2+compScatter3-2,compScatter2-compScatter3,compScatter1,compScatter2,compScatter3);
      }
    }
  }
}
double test_1(){
    double start_time = omp_get_wtime();
    int read=0, write = 1;

    // s is the number of non-pointy bit 2D slices of diamond tiling
    // that is available for the current tile size.
    int s = (tau/3) - 2;
    // subset_s is an input parameter indicating how many of those
    // slices we want to use in the repeated tiling pattern.
    // subset_s should be less than s and greater than or equal to 2.
    if (subset_s > s  || subset_s<2) {
        fprintf(stderr, "Error: need 2<=subset_s<=s\n");
        exit(-1);
    }
  
    // Set lower and upper bounds for spatial dimensions.
    // When did code gen have a non-inclusive upper bound.
    // Ian's upper bound is inclusive.
    int Li=1, Lj=1, Ui=upperBound+1, Uj=upperBound+1;
  
    // Loop over the tiling pattern.
    for (int toffset=0; toffset<T; toffset+=subset_s){
  
        // Loop over phases of tiles within repeated tile pattern.
        // This is like iterating over the A and B trapezoid tile types.
        for (int c0 = -2; c0 <= 0; c0 += 1){

            // Two loops over tiles within one phase.
            // All of the tiles within one phase can be done in parallel.

            // updates by Dave W, to the c1 and c2 loops, for OpenMP (from here to the end of the #if BOUNDING_BOX_FOR_PARALLEL_LOOPS
            //   hoist out min_c1 and max_c1, then use that to hoist a bounding box for c2
            //   initial version is just aiming for correct and parallel, without worrying about a loose boundingbox

            int c1_lb =
                max(
                    max(
                        floord(Lj + (tau/3) * c0 + (tau/3), tau), 
                        c0 + floord(-2 * T + Lj - 1, tau) + 1), 
                    floord(Lj + 1, tau)
                ); // end init block c1
            int c1_ub =
                min(
                    min(
                        floord(Uj + (tau/3) * c0 - ((tau/3)+2), tau) + 1, 
                        floord(T + Uj - 1, tau)), 
                    c0 + floord(Uj - 5, tau) + 2
                ); // end cond block c1

            // The two expressions below are the same as in the previous version, except that
            //  in the c2_lb_min_expr, I have replaced c1 with:
            //    c1_min_value where it appears with a positive coefficient, and
            //    c1_max_value where it appears with a negative coefficient.
            //  and in the c2_ub_max_expr, the opposite (i.e., c1 becomes c1_max_value where positive)
            // I will be embarrassed if I have done this wrong.
            ///  Note that I assume tau > 0
#define c2_lb_min_expr(c1_min_value, c1_max_value)              \
                    max( \
                        max( \
                            max( \
                                max( \
                                    max( \
                                        max( \
                                            c0 - 2 * c1_max_value + floord(-Ui + Lj + 1,tau),  \
                                            -c1_max_value + floord(-2 * Ui - Uj + tau * c0 + tau * c1_min_value - tau-3, tau*2)+1), \
                                        c1_min_value + floord(-Ui - 2 * Uj + 3, tau)), \
                                    floord(-Ui - Uj + 3, tau)),  \
                                c0 - c1_max_value + floord(-Ui - (tau/3) * c0 + ((tau/3)+1), tau)),  \
                            c0 - c1_max_value + floord(-T - Ui, tau) + 1),  \
                        -c1_max_value + floord(-Ui + 4, tau) - 1 \
                    ) /* end init block c2 */

#define c2_ub_max_expr(c1_min_value, c1_max_value)              \
                    min( \
                        min( \
                            min( \
                                min( \
                                    min( \
                                        min( \
                                            c0 - 2 * c1_min_value + floord(-Li + Uj - 2, tau) + 1,  \
                                            c0 - c1_min_value + floord(-Li - 2, tau) + 1),  \
                                        c0 - c1_min_value + floord(-Li - (tau/3) * c0 - ((tau/3)+1), tau) + 1), \
                                    floord(T - Li - Lj, tau)), \
                                -c1_min_value + floord(2 * T - Li, tau)),  \
                            c1_max_value + floord(-Li - 2 * Lj - 1, tau) + 1),  \
                        -c1_min_value + floord(-2 * Li - Lj + tau * c0 + tau * c1_max_value + (tau-1), tau*2) \
                    ) /* end cond block c2 */
#define c2_lb_expr(c1_value) c2_lb_min_expr(c1_value, c1_value)
#define c2_ub_expr(c1_value) c2_ub_max_expr(c1_value, c1_value)


#if BOUNDING_BOX_FOR_PARALLEL_LOOPS
            int c2_box_lb = c2_lb_min_expr(c1_lb, c1_ub);
            int c2_box_ub = c2_ub_max_expr(c1_lb, c1_ub);
#if PARALLEL
            // don't need to mention c1...c5 below, since they're scoped inside the for loops
#pragma omp parallel for shared(start_time, s, Li, Lj, Ui, Uj, toffset, c0, c1_lb, c1_ub, c2_box_lb, c2_box_ub, ) private(read, write) collapse(2)
#endif
            for (int c1 = c1_lb; c1 <= c1_ub; c1 += 1) {
                for (int c2 = c2_box_lb; c2 <= c2_box_ub; c2 += 1) if (c2 >= c2_lb_expr(c1) && c2 <= c2_ub_expr(c1)) {
#else
            for (int c1 = c1_lb; c1 <= c1_ub; c1 += 1) {
                for (int c2 = c2_lb_expr(c1); c2 <= c2_ub_expr(c1); c2 += 1) {
#endif
                  //fprintf(stdout, "(%d,%d,%d)\n", c0,c1,c2);
                    // Loop over subset_s time steps within tiling pattern
                    // and within tile c0,c1,c2.
                    // Every time the pattern is repeated, toffset will be
                    // subset_s bigger.
                    // The real t value is c3+toffset.  We are just using the
                    // tiling pattern from t=1 to t<=subset_s.
                    for (int c3 = 1; c3 <= min(T-toffset,subset_s); c3 += 1){
                        int t = c3+toffset;
                        // if t % 2  is 1, then read=0 and write=1
                        write = t & 1;
                        read = 1-write;

                        // x spatial dimension.
                        for (int c4 = 
                            max(
                                max(
                                    max(
                                        -tau * c1 - tau * c2 + 2 * c3 - (2*tau-2),
                                        -Uj - tau * c2 + c3 - (tau-2)),
                                   tau * c0 - tau * c1 - tau * c2 - c3),
                                 Li
                            ); // end init block c4
                        c4 <= 
                            min(
                                min(
                                    min(
                                        tau * c0 - tau * c1 - tau * c2 - c3 + (tau-1),
                                        -tau * c1 - tau * c2 + 2 * c3),
                                    -Lj - tau * c2 + c3),
                                Ui - 1
                            ); // end cond block c4
                        c4 += 1){

                            // y spatial dimension.
                            for (int c5 = 
                                max(
                                    max(
                                        tau * c1 - c3,
                                        Lj),
                                    -tau * c2 + c3 - c4 - (tau-1)
                                ); // end init block c5
                                c5 <= 
                                    min(
                                        min(
                                            Uj - 1,
                                            -tau * c2 + c3 - c4),
                                        tau * c1 - c3 + (tau-1)
                                ); // end cond block c5
                                c5 += 1){
                                //fprintf(stdout, "(%d,%d,%d,%d,%d,%d)\n", c0,c1,c2,c3,c4,c5);
                                stencil( read, write, c4, c5);
                            } // for c5
                        } // for c4
                    } // for c3
                } // for c2
            } // for c1
        } // for c0
    } // for toffset
    
    double end_time = omp_get_wtime();
    
    return (end_time - start_time);
}

int main( int argc, char* argv[] ){
    setbuf(stdout, NULL); // set buffer to null, so prints ALWAYS print (for debug purposes mainly)
    
    bool verify = false;
    bool printtime = true;
    // Command line parsing
    char c;
    while ((c = getopt (argc, argv, "nc:s:p:T:t:hv")) != -1){
        switch( c ) {
            case 'n':
                printtime=false;
                break;
                
            case 'c': // problem size
                cores = parseInt( optarg );
                if( cores <= 0 ){
                    fprintf(stderr, "cores must be greater than 0: %d\n", cores);
                    exit(BAD_RUN_TIME_PARAMETERS);
                }
                break;
                
            case 's': // subset
                //globalSeed = parseInt( optarg );
                subset_s = parseInt( optarg );
                break;
                
            case 'p': // problem size
                problemSize = parseInt( optarg );
                if( problemSize <= 0 ){
                    fprintf(stderr, "problemSize must be greater than 0: %d\n", problemSize);
                    exit(BAD_RUN_TIME_PARAMETERS);
                }
                break;
                
            case 'T': // T (time steps)
                T = parseInt( optarg );
                if( T <= 0 ){    
                    fprintf(stderr, "T must be greater than 0: %d\n", T);
                    exit(BAD_RUN_TIME_PARAMETERS);
                }
                break;
            
            case 't': // tau
#if defined tau
                fprintf(stderr, "don't use -t to set tau when you compiled with -Dtau=%d.\n", tau);
                if (parseInt(optarg) != tau)
                    exit(BAD_COMPILE_TIME_PARAMETERS);
#else
                tau = parseInt( optarg );
#endif
                break;
            
            case 'h': // help
                printf("usage: %s\n-n \t dont print time \n-p <problem size> \t problem size in elements \n-T <time steps>\t number of time steps\n-c <cores>\tnumber of threads\n-s <subset_s>\t tile parameter\n-t <tau>\t tile parameter\n-h\tthis dialogue\n-v\tverify output\n", argv[0]);
                exit(0);
            
            case 'v': // verify;
                verify = true;
                break;
            
            case '?':
                if (optopt == 'p')
                    fprintf (stderr, "Option -%c requires positive int argument: problem size.\n", optopt);
                else if (optopt == 'T')
                    fprintf (stderr, "Option -%c requires positive int argument: T.\n", optopt);
                else if (optopt == 's')
                    fprintf (stderr, "Option -%c requires int argument: subset_s.\n", optopt);
                else if (optopt == 'c')
                    fprintf (stderr, "Option -%c requires int argument: number of cores.\n", optopt);
                else if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                    fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt);
                exit(0);
                
          default:
             exit(0);
          }
    }

    if( !( tau % 3 == 0 && tau >= 15 ) ){
#if defined tau
        fprintf(stderr, "tau must be a multiple of 3, and >= 15, but the program was compiled with -Dtau=%d, and thus can't run :-(\n", tau);
        exit(BAD_COMPILE_TIME_PARAMETERS);
#else
        fprintf(stderr, "tau must be a multiple of 3, and >= 15, but it's %d; re-run with a different -t value\n", tau);
        exit(BAD_RUN_TIME_PARAMETERS);
#endif
    }


    init();
    initSpace();
    
    double time = test_1();
    
    if( printtime ) {
        printf( "Time: %f\n", time );
    }

    if( verify ){
        verifyResult( true );
    }
    
}
Esempio n. 7
0
int main(void) {
  int t, y, x, k;
  double total_lattice_pts = (double)nY * (double)nX * (double)nTimesteps;

  /* For timekeeping */
  int ts_return = -1;
  struct timeval start, end, result;
  double tdiff = 0.0;

  /* Compute values for global parameters */
  omega = 1.0 / tau;
  circle_R2 = circle_radius * circle_radius;

  double rho_avg = (rho_in + rho_out) / 2.0;

  printf(
      "2D Flow Past Cylinder simulation with D2Q9 lattice:\n"
      "\tscheme     : 2-Grid, Fused, Pull\n"
      "\tgrid size  : %d x %d = %.2lf * 10^3 Cells\n"
      "\tnTimeSteps : %d\n"
      "\tomega      : %.6lf\n",
      nX, nY, nX * nY / 1.0e3, nTimesteps, omega);

  /* Initialize all 9 PDFs for each point in the domain to 1.0 */
  for (y = 0; y < nY + 2 + 4; y++) {
    for (x = 0; x < nX + 2 + 2; x++) {
      grid[0][y][x][0] = w1 * rho_avg;
      grid[1][y][x][0] = w1 * rho_avg;

      for (k = 1; k < 5; k++) {
        grid[0][y][x][k] = w2 * rho_avg;
        grid[1][y][x][k] = w2 * rho_avg;
      }

      for (k = 5; k < nK; k++) {
        grid[0][y][x][k] = w3 * rho_avg;
        grid[1][y][x][k] = w3 * rho_avg;
      }
    }
  }

  /* To satisfy PET */
  short _nX = nX + 2;
  short _nY = nY + 3;
  int _nTimesteps = nTimesteps;

#ifdef TIME
  gettimeofday(&start, 0);
#endif

  int t1, t2, t3, t4, t5, t6;
  int lb, ub, lbp, ubp, lb2, ub2;
  register int lbv, ubv;
  /* Start of CLooG code */
  if ((_nTimesteps >= 1) && (_nX >= 3) && (_nY >= 4)) {
    for (t1 = -1; t1 <= floord(5 * _nTimesteps + 3 * _nY - 8, 32); t1++) {
      lbp = max(max(ceild(4 * t1, 5), ceild(16 * t1 - _nTimesteps + 1, 12)),
                ceild(32 * t1 - _nTimesteps + 4, 32));
      ubp = min(min(floord(4 * t1 + 4, 3), floord(_nTimesteps + _nY - 2, 8)),
                floord(16 * t1 + _nY + 14, 20));
#pragma omp parallel for private(lbv, ubv, t3, t4, t5, t6)
      for (t2 = lbp; t2 <= ubp; t2++) {
        for (t3 = max(max(0, ceild(4 * t1 - 3 * t2 - 1, 2)),
                      ceild(8 * t2 - _nY - 4, 8));
             t3 <= min(min(min(floord(_nTimesteps + _nX - 2, 8),
                               floord(8 * t2 + _nX + 3, 8)),
                           floord(16 * t1 - 12 * t2 + _nX + 18, 8)),
                       floord(32 * t1 - 32 * t2 + _nY + _nX + 29, 8));
             t3++) {
          for (t4 = max(
                   max(max(max(0, 16 * t1 - 12 * t2), 32 * t1 - 32 * t2 + 3),
                       8 * t2 - _nY + 1),
                   8 * t3 - _nX + 1);
               t4 <= min(min(min(min(_nTimesteps - 1, 8 * t2 + 4), 8 * t3 + 5),
                             16 * t1 - 12 * t2 + 19),
                         32 * t1 - 32 * t2 + _nY + 30);
               t4++) {

            /* Hoisted loop conditional */
            if (t4 % 2 == 0) {
              for (t5 = max(max(8 * t2, t4 + 3),
                            -32 * t1 + 32 * t2 + 2 * t4 - 31);
                   t5 <= min(min(8 * t2 + 7, -32 * t1 + 32 * t2 + 2 * t4),
                             t4 + _nY - 1);
                   t5++) {
                lbv = max(8 * t3, t4 + 2);
                ubv = min(8 * t3 + 7, t4 + _nX - 1);

#pragma ivdep
#pragma vector always
                for (t6 = lbv; t6 <= ubv; t6++) {
                  lbm_kernel(grid[0][(-t4 + t5)][(-t4 + t6)][0],
                             grid[0][(-t4 + t5) - 1][(-t4 + t6)][3],
                             grid[0][(-t4 + t5) + 1][(-t4 + t6)][4],
                             grid[0][(-t4 + t5)][(-t4 + t6) - 1][1],
                             grid[0][(-t4 + t5)][(-t4 + t6) + 1][2],
                             grid[0][(-t4 + t5) - 1][(-t4 + t6) - 1][5],
                             grid[0][(-t4 + t5) - 1][(-t4 + t6) + 1][6],
                             grid[0][(-t4 + t5) + 1][(-t4 + t6) - 1][7],
                             grid[0][(-t4 + t5) + 1][(-t4 + t6) + 1][8],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][0],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][3],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][4],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][1],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][2],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][5],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][6],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][7],
                             &grid[1][(-t4 + t5)][(-t4 + t6)][8], (t4),
                             ((-t4 + t5)), ((-t4 + t6)));
                  ;
                }
              }
            } else {
              for (t5 = max(max(8 * t2, t4 + 3),
                            -32 * t1 + 32 * t2 + 2 * t4 - 31);
                   t5 <= min(min(8 * t2 + 7, -32 * t1 + 32 * t2 + 2 * t4),
                             t4 + _nY - 1);
                   t5++) {
                lbv = max(8 * t3, t4 + 2);
                ubv = min(8 * t3 + 7, t4 + _nX - 1);

#pragma ivdep
#pragma vector always
                for (t6 = lbv; t6 <= ubv; t6++) {
                  lbm_kernel(grid[1][(-t4 + t5)][(-t4 + t6)][0],
                             grid[1][(-t4 + t5) - 1][(-t4 + t6)][3],
                             grid[1][(-t4 + t5) + 1][(-t4 + t6)][4],
                             grid[1][(-t4 + t5)][(-t4 + t6) - 1][1],
                             grid[1][(-t4 + t5)][(-t4 + t6) + 1][2],
                             grid[1][(-t4 + t5) - 1][(-t4 + t6) - 1][5],
                             grid[1][(-t4 + t5) - 1][(-t4 + t6) + 1][6],
                             grid[1][(-t4 + t5) + 1][(-t4 + t6) - 1][7],
                             grid[1][(-t4 + t5) + 1][(-t4 + t6) + 1][8],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][0],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][3],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][4],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][1],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][2],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][5],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][6],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][7],
                             &grid[0][(-t4 + t5)][(-t4 + t6)][8], (t4),
                             ((-t4 + t5)), ((-t4 + t6)));
                  ;
                }
              }
            }
            /* end hoisted if */
          }
        }
      }
    }
  }
/* End of CLooG code */

#ifdef TIME
  gettimeofday(&end, 0);

  ts_return = timeval_subtract(&result, &end, &start);
  tdiff = (double)(result.tv_sec + result.tv_usec * 1.0e-6);

  printf("\tTime taken : %7.5lfs\n", tdiff);
  printf("\tMLUPS      : %7.5lf\n", (total_lattice_pts / (1.0e6 * tdiff)));
#endif

#ifdef DEBUG
  /* Dump rho, uX, uY for the entire domain to verify results */
  dumpVelocities(t);
#endif

  return 0;
}
if (m >= 8 * floord(m + 1, 8))
  for (int c0 = 4 * floord(m + 1, 32); c0 <= n; c0 += 1)
    s0(c0);
Esempio n. 9
0
/* Generated from ./non_optimal/nul_complex1.cloog by CLooG 0.18.1-2-g43fc508 gmp bits in 0.00s. */
if (n >= 0) {
  for (c1=0;c1<=5*n;c1++) {
    for (c2=max(ceild(2*c1,3),c1-n);c2<=min(floord(2*c1+2*n,3),c1);c2++) {
      if (c2%2 == 0) {
        S1(((-2*c1+3*c2)/2),(c1-c2));
      }
    }
  }
}
Esempio n. 10
0
int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  for (annot_i=0; annot_i<REPS; annot_i++)
  {
    annot_t_start = rtclock();




#define ceild(n,d)  ceil(((double)(n))/((double)(d)))
#define floord(n,d) floor(((double)(n))/((double)(d)))
#define max(x,y)    ((x) > (y)? (x) : (y))
#define min(x,y)    ((x) < (y)? (x) : (y))

	#define S1(zT0,zT1,zT2,zT3,i,j)	{B[i][j]=u2[i]*v2[j]+u1[i]*v1[j]+A[i][j];}
	#define S2(zT0,zT1,zT2,zT3,i,j)	{x[i]=beta*B[j][i]*y[j]+x[i];}
	#define S3(i)	{x[i]=z[i]+x[i];}
	#define S4(i,j)	{w[i]=alpha*B[i][j]*x[j]+w[i];}

	int c1, c2, c3, c4, c5, c6, c7, c8, c9, c10;

	register int lbv, ubv;

/* Generated from PLuTo-produced CLooG file by CLooG v0.14.1 64 bits in 0.05s. */
for (c2=0;c2<=floord(N-1,256);c2++) {
  for (c3=0;c3<=floord(N-1,256);c3++) {
    for (c4=max(0,8*c2);c4<=min(8*c2+7,floord(N-1,32));c4++) {
      for (c5=max(8*c3,0);c5<=min(floord(N-1,32),8*c3+7);c5++) {
/*@ begin Loop(
transform UnrollJam(ufactor=32)
        for (c6=max(32*c5,0);c6<=min(N-1,32*c5+31);c6++) 
{
{
	lbv=max(32*c4,0);
	ubv=min(N-1,32*c4+31);
#pragma ivdep
#pragma vector always
	for (c7=lbv; c7<=ubv; c7++) {
            S1(c3,c2,c5,c4,c6,c7) ;
            S2(c2,c3,c4,c5,c7,c6) ;
          }
}
}
) @*/{ 

  for (c6 = max(32 * c5, 0); c6 <= min(N - 1, 32 * c5 + 31) - 31; c6 = c6 + 32) 
{
	lbv=max(32*c4,0);
	ubv=min(N-1,32*c4+31);
#pragma ivdep
#pragma vector always
	for (c7=lbv; c7<=ubv; c7++) {
        S1(c3, c2, c5, c4, c6, c7); 
        S2(c2, c3, c4, c5, c7, c6); 
        S1(c3, c2, c5, c4, (c6 + 1), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 1)); 
        S1(c3, c2, c5, c4, (c6 + 2), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 2)); 
        S1(c3, c2, c5, c4, (c6 + 3), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 3)); 
        S1(c3, c2, c5, c4, (c6 + 4), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 4)); 
        S1(c3, c2, c5, c4, (c6 + 5), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 5)); 
        S1(c3, c2, c5, c4, (c6 + 6), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 6)); 
        S1(c3, c2, c5, c4, (c6 + 7), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 7)); 
        S1(c3, c2, c5, c4, (c6 + 8), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 8)); 
        S1(c3, c2, c5, c4, (c6 + 9), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 9)); 
        S1(c3, c2, c5, c4, (c6 + 10), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 10)); 
        S1(c3, c2, c5, c4, (c6 + 11), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 11)); 
        S1(c3, c2, c5, c4, (c6 + 12), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 12)); 
        S1(c3, c2, c5, c4, (c6 + 13), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 13)); 
        S1(c3, c2, c5, c4, (c6 + 14), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 14)); 
        S1(c3, c2, c5, c4, (c6 + 15), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 15)); 
        S1(c3, c2, c5, c4, (c6 + 16), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 16)); 
        S1(c3, c2, c5, c4, (c6 + 17), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 17)); 
        S1(c3, c2, c5, c4, (c6 + 18), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 18)); 
        S1(c3, c2, c5, c4, (c6 + 19), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 19)); 
        S1(c3, c2, c5, c4, (c6 + 20), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 20)); 
        S1(c3, c2, c5, c4, (c6 + 21), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 21)); 
        S1(c3, c2, c5, c4, (c6 + 22), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 22)); 
        S1(c3, c2, c5, c4, (c6 + 23), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 23)); 
        S1(c3, c2, c5, c4, (c6 + 24), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 24)); 
        S1(c3, c2, c5, c4, (c6 + 25), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 25)); 
        S1(c3, c2, c5, c4, (c6 + 26), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 26)); 
        S1(c3, c2, c5, c4, (c6 + 27), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 27)); 
        S1(c3, c2, c5, c4, (c6 + 28), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 28)); 
        S1(c3, c2, c5, c4, (c6 + 29), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 29)); 
        S1(c3, c2, c5, c4, (c6 + 30), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 30)); 
        S1(c3, c2, c5, c4, (c6 + 31), c7); 
        S2(c2, c3, c4, c5, c7, (c6 + 31)); 
      } 
}

  for (; c6 <= min(N - 1, 32 * c5 + 31); c6 = c6 + 1) 
{
	lbv=max(32*c4,0);
	ubv=min(N-1,32*c4+31);
#pragma ivdep
#pragma vector always
	for (c7=lbv; c7<=ubv; c7++) {
        S1(c3, c2, c5, c4, c6, c7); 
        S2(c2, c3, c4, c5, c7, c6); 
      } 
}
} 
/*@ end @*/
      }
    }
  }
}
for (c2=0;c2<=N-1;c2++) {
  S3(c2) ;
}
for (c2=0;c2<=N-1;c2++) {
  for (c3=0;c3<=N-1;c3++) {
    S4(c2,c3) ;
  }
}
/* End of CLooG code */

    annot_t_end = rtclock();
    annot_t_total += annot_t_end - annot_t_start;
  }
  
  annot_t_total = annot_t_total / REPS;
  printf("%f\n", annot_t_total);

  return ((int) w[0]); 

}
Esempio n. 11
0
for (int c0 = 4 * floord(m - 1, 12) + 4; c0 <= floord(n, 3); c0 += 4)
  s0(c0);
Esempio n. 12
0
for (int c0 = 0; c0 <= 3; c0 += 1)
  for (int c1 = max(0, 2 * c0 - 3); c1 <= min(c0 + c0 / 2 + 1, 3); c1 += 1)
    for (int c2 = c0; c2 <= min(min(3, 3 * c1 + 2), 2 * c0 - c1 + 1); c2 += 1)
      for (int c3 = max(max(max(c1 - (-c1 + 3) / 3, 0), c2 + floord(3 * c1 - c2 - 1, 6)), 2 * c0 - (c0 + c1 + 1) / 3 - 1); c3 <= min(c0 + 1, 3); c3 += 1)
        for (int c4 = max(max(max(max(-200 * c1 + 400 * c3 - 199, 333 * c1 + c1 / 3), 333 * c2 + (c2 + 1) / 3), 667 * c0 - 333 * c1 - (c0 + c1 + 3) / 3 - 332), 250 * c3 + 1); c4 <= min(min(min(min(500 * c0 + 499, -200 * c1 + 400 * c3 + 400), 333 * c3 - (-c3 + 3) / 3 + 334), 1000), 333 * c2 - (-c2 + 3) / 3 + 333); c4 += 1)
          for (int c5 = max(max(max(1000 * c3 - 2 * c4 + 2, 1000 * c0 - c4), 500 * c1 + (c4 + 1) / 2), c4); c5 <= min(min(min(1000 * c3 - 2 * c4 + 1001, 1000 * c0 - c4 + 999), 500 * c1 + (c4 + 1) / 2 + 499), 2 * c4 + 1); c5 += 1)
            s0(c0, c1, c2, c3, c4, c5);
Esempio n. 13
0
for (int c0 = 1; c0 < max((6 * M + 3 * N + 188) / 200 - 2, (N + 93) / 100 + 3 * ((2 * M + N - 4) / 200) - 1); c0 += 1)
  for (int c1 = max(0, floord(-N + 100 * c0 + 106, 300)); c1 <= min((2 * M + N - 4) / 200 - 1, (c0 - 1) / 3); c1 += 1)
    S2(c0 - c1, c1);
Esempio n. 14
0
for (int c0 = 0; c0 <= 5 * n; c0 += 1)
  for (int c1 = max(-((n + c0 + 1) % 2) - n + c0 + 1, 2 * floord(c0 - 1, 3) + 2); c1 <= min(n + c0 - (n + c0 + 2) / 3, c0); c1 += 2)
    S1((-2 * c0 + 3 * c1) / 2, c0 - c1);
Esempio n. 15
0
for (int c0 = 1; c0 <= min(4, floord(2 * m - 1, 17) + 1); c0 += 1)
  for (int c1 = 1; c1 <= min(2, -2 * c0 + (2 * m + 3 * c0 - 4) / 10 + 3); c1 += 1)
    for (int c2 = 0; c2 <= min(2, -c0 - c1 + (2 * m + 3 * c0 + 10 * c1 + 6) / 20 + 1); c2 += 1)
      for (int c3 = 8 * c0 + (c0 + 1) / 2 - 8; c3 <= min(min(30, m - 5 * c1 - 10 * c2 + 5), 8 * c0 + c0 / 2); c3 += 1)
        for (int c4 = 5 * c1 + 10 * c2 - 4; c4 <= min(5 * c1 + 10 * c2, m - c3 + 1); c4 += 1)
          s0(c0, c1, c2, c3, c4, -9 * c0 + c3 + c0 / 2 + 9, -5 * c1 - 5 * c2 + c4 + 5);
Esempio n. 16
0
int main()
{
  int tx, ty, x, y;
  int trial;

  IN = (double **)malloc((N+2) * sizeof(double *));
  for (int i=0; i<N+2; i++)
    IN[i] = (double *)malloc((N) * sizeof(double));

  BLURX = (double **)malloc((N) * sizeof(double *));
  for (int i=0; i<N; i++)
    BLURX[i] = (double *)malloc((N) * sizeof(double));

  OUT = (double **)malloc((N) * sizeof(double *));
  for (int i=0; i<N; i++)
    OUT[i] = (double *)malloc((N) * sizeof(double));

  init_array();

#ifdef PERFCTR
  PERF_INIT; 
#endif

  IF_TIME(t_start = rtclock());

  for (trial = 0; trial < 10 ; ++trial)
    {

#pragma scop
      for(ty = 0; ty <= floord((N-1),B); ++ty)
#pragma omp parallel for private(tx,x,y)
	for(tx = 0; tx <= floord((N-1),B); ++tx){
	  for(x = 0; x <= B-1; ++x){	
	    for(y = 0; y <= B-1; ++y)
	      /* P */     blurx(x,y)=in(x,y)+in((x+1),y)+in((x+2),y);
	    for(y = 0; y <= B-1; ++y)
	      if(ty*B+y>=2)
		/* Q */        out(x,y)=blurx(x,y)+blurx(x,(y-1))+blurx(x,(y-2));
	  }
	}
#pragma endscop
    }

  IF_TIME(t_end = rtclock());
  IF_TIME(fprintf(stderr, "File:%s \t\t N=%d,T=%d \t Runtime=%0.6lfs\n", __FILE__, N, B, (t_end - t_start)/10));

#ifdef PERFCTR
  PERF_EXIT; 
#endif


#ifdef VERIFY
  for(x = 0; x <= N-1; ++x)
    for(y = 0; y <= N-1; ++y)
      BLURX[x][y]=IN[x][y]+IN[x+1][y]+IN[x+2][y];
  // Stage 2: vertical blur
  for(x = 0; x <= N-1; ++x)
    for(y = 2; y <= N-1; ++y)
      {
	if(OUT[x][y] != BLURX[x][y]+BLURX[x][y-1]+BLURX[x][y-2])
	  {
	    printf("Difference at (%d, %d) : %f versus %f\n", x, y, OUT[x][y], BLURX[x][y]+BLURX[x][y-1]+BLURX[x][y-2]);
	    break;
	  }
      }
#endif

  if (fopen(".test", "r")) {
    // print_array();
  }

  return 0;
}
Esempio n. 17
0
int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  for (annot_i=0; annot_i<REPS; annot_i++)
  {
    annot_t_start = rtclock();
    
  


int t, i, j, k, l, m, n,ii,jj;

	#define S1(zT0,zT1,t,j)	{ey[0][j]=t;}
	#define S2(zT0,zT1,zT2,t,i,j)	{ey[i][j]=ey[i][j]-((double)(1))/2*(hz[i][j]-hz[i-1][j]);}
	#define S3(zT0,zT1,zT2,t,i,j)	{ex[i][j]=ex[i][j]-((double)(1))/2*(hz[i][j]-hz[i][j-1]);}
	#define S4(zT0,zT1,zT2,t,i,j)	{hz[i][j]=hz[i][j]-((double)(7))/10*(ey[1+i][j]+ex[i][1+j]-ex[i][j]-ey[i][j]);}

	int c1, c2, c3, c4, c5, c6, c7;

	register int lbv, ubv;

for (c1=0;c1<=floord(tmax-1,32);c1++) {
  for (c2=max(ceild(32*c1-31,32),0);c2<=min(floord(tmax+ny-1,32),floord(32*c1+ny+31,32));c2++) {
for (c3=max(max(max(max(ceild(32*c2-ny-30,32),0),ceild(64*c1-32*c2-61,32)),ceild(32*c1-31,32)),ceild(32*c1-992*c2-1891,992));c3<=min(min(floord(32*c2+nx+30,32),floord(tmax+nx-1,32)),floord(32*c1+nx+31,32));c3++) {
      if ((c1 <= floord(32*c3-nx,32)) && (c2 <= floord(32*c3-nx+ny,32)) && (c3 >= ceild(nx,32))) {
        for (c5=max(32*c3-nx+1,32*c2);c5<=min(32*c2+31,32*c3-nx+ny);c5++) {
          S4(c1,-c1+c3,-c1+c2,32*c3-nx,nx-1,-32*c3+c5+nx-1) ;
        }
      }
      if ((c1 <= floord(32*c2-ny,32)) && (c2 >= max(ceild(32*c3-nx+ny+1,32),ceild(ny,32)))) {
        for (c6=max(32*c3,32*c2-ny+1);c6<=min(32*c2+nx-ny,32*c3+31);c6++) {
          S4(c1,-c1+c3,-c1+c2,32*c2-ny,-32*c2+c6+ny-1,ny-1) ;
        }
      }
      if (c1 == c3) {
        for (c4=max(max(32*c2-ny+1,0),32*c3);c4<=min(min(32*c3+30,32*c2-ny+31),tmax-1);c4++) {
          for (c5=32*c2;c5<=c4+ny-1;c5++) {
            S1(c1,-c1+c2,c4,-c4+c5) ;
            S3(c1,0,-c1+c2,c4,0,-c4+c5) ;
            for (c6=c4+1;c6<=32*c3+31;c6++) {
              S2(c1,0,-c1+c2,c4,-c4+c6,-c4+c5) ;
              S3(c1,0,-c1+c2,c4,-c4+c6,-c4+c5) ;
              S4(c1,0,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
            }
          }
          for (c6=c4+1;c6<=32*c3+31;c6++) {
            S4(c1,0,-c1+c2,c4,-c4+c6-1,ny-1) ;
          }
        }
      }
      if (c1 == c3) {
        for (c4=max(max(0,32*c3),32*c2-ny+32);c4<=min(min(tmax-1,32*c3+30),32*c2-1);c4++) {
          for (c5=32*c2;c5<=32*c2+31;c5++) {
            S1(c1,-c1+c2,c4,-c4+c5) ;
            S3(c1,0,-c1+c2,c4,0,-c4+c5) ;
            for (c6=c4+1;c6<=32*c3+31;c6++) {
              S2(c1,0,-c1+c2,c4,-c4+c6,-c4+c5) ;
              S3(c1,0,-c1+c2,c4,-c4+c6,-c4+c5) ;
              S4(c1,0,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
            }
          }
        }
      }
      if (c1 == c3) {
        for (c4=max(max(32*c2,0),32*c3);c4<=min(min(tmax-1,32*c3+30),32*c2+30);c4++) {
          S1(c1,-c1+c2,c4,0) ;
          for (c6=c4+1;c6<=32*c3+31;c6++) {
            S2(c1,0,-c1+c2,c4,-c4+c6,0) ;
          }
          for (c5=c4+1;c5<=32*c2+31;c5++) {
            S1(c1,-c1+c2,c4,-c4+c5) ;
            S3(c1,0,-c1+c2,c4,0,-c4+c5) ;
            for (c6=c4+1;c6<=32*c3+31;c6++) {
              S2(c1,0,-c1+c2,c4,-c4+c6,-c4+c5) ;
              S3(c1,0,-c1+c2,c4,-c4+c6,-c4+c5) ;
              S4(c1,0,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
            }
          }
        }
      }
      for (c4=max(max(max(32*c1,0),32*c2-ny+1),32*c3-nx+1);c4<=min(min(min(32*c3-nx+31,32*c2-ny+31),32*c1+31),tmax-1);c4++) {
        for (c5=32*c2;c5<=c4+ny-1;c5++) {
          for (c6=32*c3;c6<=c4+nx-1;c6++) {
            S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S3(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
          }
          S4(c1,-c1+c3,-c1+c2,c4,nx-1,-c4+c5-1) ;
        }
        for (c6=32*c3;c6<=c4+nx;c6++) {
          S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,ny-1) ;
        }
      }
      for (c4=max(max(max(32*c1,0),32*c3-nx+1),32*c2-ny+32);c4<=min(min(min(tmax-1,32*c1+31),32*c2-1),32*c3-nx+31);c4++) {
        for (c5=32*c2;c5<=32*c2+31;c5++) {
          for (c6=32*c3;c6<=c4+nx-1;c6++) {
            S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S3(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
          }
          S4(c1,-c1+c3,-c1+c2,c4,nx-1,-c4+c5-1) ;
        }
      }
      for (c4=max(max(max(32*c3-nx+32,32*c1),0),32*c2-ny+1);c4<=min(min(min(32*c2-ny+31,32*c1+31),tmax-1),32*c3-1);c4++) {
        for (c5=32*c2;c5<=c4+ny-1;c5++) {
          for (c6=32*c3;c6<=32*c3+31;c6++) {
            S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S3(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
          }
        }
        for (c6=32*c3;c6<=32*c3+31;c6++) {
          S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,ny-1) ;
        }
      }
      for (c4=max(max(max(32*c2,32*c1),0),32*c3-nx+1);c4<=min(min(min(32*c2+30,tmax-1),32*c1+31),32*c3-nx+31);c4++) {
        for (c6=32*c3;c6<=c4+nx-1;c6++) {
          S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,0) ;
        }
        for (c5=c4+1;c5<=32*c2+31;c5++) {
          for (c6=32*c3;c6<=c4+nx-1;c6++) {
            S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S3(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
          }
          S4(c1,-c1+c3,-c1+c2,c4,nx-1,-c4+c5-1) ;
        }
      }
      for (c4=max(max(max(32*c1,0),32*c3-nx+32),32*c2-ny+32);c4<=min(min(min(32*c3-1,tmax-1),32*c1+31),32*c2-1);c4++) {
/*@ begin Loop(
 transform Composite(                                                                        
  tile = [('c5',T1,'ii'),('c6',T2,'jj')],
  permut = [PERMUTS],
  unrolljam = [('c5',U1),('c6',U2)],
  vector = (VEC, ['ivdep','vector always'])
 )                        
        for (c5=32*c2;c5<=32*c2+31;c5++) 
          for (c6=32*c3;c6<=32*c3+31;c6++) 
{
            S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S3(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
}
) @*/{
  for (c6=32*c3; c6<=32*c3+28; c6=c6+4) {
    register int cbv_1, cbv_2;
    cbv_1=32*c2;
    cbv_2=32*c2+31;
#pragma ivdep
#pragma vector always
    for (c5=cbv_1; c5<=cbv_2; c5=c5+1) {
      S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5);
      S2(c1,-c1+c3,-c1+c2,c4,-c4+c6+1,-c4+c5);
      S2(c1,-c1+c3,-c1+c2,c4,-c4+c6+2,-c4+c5);
      S2(c1,-c1+c3,-c1+c2,c4,-c4+c6+3,-c4+c5);
      S3(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5);
      S3(c1,-c1+c3,-c1+c2,c4,-c4+c6+1,-c4+c5);
      S3(c1,-c1+c3,-c1+c2,c4,-c4+c6+2,-c4+c5);
      S3(c1,-c1+c3,-c1+c2,c4,-c4+c6+3,-c4+c5);
      S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,-c4+c5-1);
      S4(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5-1);
      S4(c1,-c1+c3,-c1+c2,c4,-c4+c6+1,-c4+c5-1);
      S4(c1,-c1+c3,-c1+c2,c4,-c4+c6+2,-c4+c5-1);
    }
  }
  for (; c6<=32*c3+31; c6=c6+1) {
    register int cbv_3, cbv_4;
    cbv_3=32*c2;
    cbv_4=32*c2+31;
#pragma ivdep
#pragma vector always
    for (c5=cbv_3; c5<=cbv_4; c5=c5+1) {
      S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5);
      S3(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5);
      S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,-c4+c5-1);
    }
  }
}
/*@ end @*/
      }
      for (c4=max(max(max(32*c2,32*c3-nx+32),32*c1),0);c4<=min(min(min(32*c3-1,32*c2+30),tmax-1),32*c1+31);c4++) {
        for (c6=32*c3;c6<=32*c3+31;c6++) {
          S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,0) ;
        }
        for (c5=c4+1;c5<=32*c2+31;c5++) {
          for (c6=32*c3;c6<=32*c3+31;c6++) {
            S2(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S3(c1,-c1+c3,-c1+c2,c4,-c4+c6,-c4+c5) ;
            S4(c1,-c1+c3,-c1+c2,c4,-c4+c6-1,-c4+c5-1) ;
          }
        }
      }
      if ((c1 == c3) && (c2 <= min(floord(32*c3-1,32),floord(tmax-32,32)))) {
        S1(c1,-c1+c2,32*c2+31,0) ;
        for (c6=32*c2+32;c6<=32*c3+31;c6++) {
          S2(c1,0,-c1+c2,32*c2+31,-32*c2+c6-31,0) ;
        }
      }
      if ((-c1 == -c3) && (c1 >= ceild(32*c2-31,32)) && (c1 <= min(floord(tmax-32,32),floord(32*c2-1,32)))) {
        S1(c1,-c1+c2,32*c1+31,0) ;
        for (c5=32*c1+32;c5<=32*c2+31;c5++) {
          S1(c1,-c1+c2,32*c1+31,-32*c1+c5-31) ;
          S3(c1,0,-c1+c2,32*c1+31,0,-32*c1+c5-31) ;
        }
      }
      if ((-c1 == -c3) && (c1 <= min(floord(tmax-32,32),c2-1))) {
        for (c5=32*c2;c5<=min(32*c2+31,32*c1+ny+30);c5++) {
          S1(c1,-c1+c2,32*c1+31,-32*c1+c5-31) ;
          S3(c1,0,-c1+c2,32*c1+31,0,-32*c1+c5-31) ;
        }
      }
      if ((-c1 == -c2) && (-c1 == -c3) && (c1 <= floord(tmax-32,32))) {
        S1(c1,0,32*c1+31,0) ;
      }
      if ((c1 >= c2) && (c2 <= min(c3-1,floord(tmax-32,32)))) {
        for (c6=32*c3;c6<=min(32*c2+nx+30,32*c3+31);c6++) {
          S2(c1,-c1+c3,-c1+c2,32*c2+31,-32*c2+c6-31,0) ;
        }
      }
    }
  }
}



    annot_t_end = rtclock();
    annot_t_total += annot_t_end - annot_t_start;
  }
  
  annot_t_total = annot_t_total / REPS;
  printf("%f\n", annot_t_total);
  
  return 1;
}
Esempio n. 18
0
for (int c0 = 0; c0 <= 3; c0 += 1)
  for (int c1 = max(0, 2 * c0 - 3); c1 <= min(c0 + 1, 3); c1 += 1)
    for (int c2 = c0; c2 <= min(min(3, 3 * c1 + 2), 2 * c0 - c1 + 1); c2 += 1)
      for (int c3 = max(max(max(c2 - (c2 + 2) / 3, c2 + floord(3 * c1 - c2 - 1, 6)), c1 - (-c1 + 3) / 3), c0 - (-c2 + 3) / 3); c3 <= min(c0 + c0 / 2 + 1, 3); c3 += 1)
        for (int c5 = max(max(max(max(c1 - (-c1 + 3) / 3, 0), 2 * c3 - 4), c3 - (c3 + 3) / 3), c2 - (c2 + 3) / 3); c5 <= min(min(-c2 + 2 * c3 - (c2 + 3) / 3 + 2, c1 + 1), c3); c5 += 1)
          for (int c6 = max(max(max(max(max(250 * c3 + 1, 667 * c0 - 333 * c1 - (c0 + c1 + 3) / 3 - 332), -200 * c1 + 400 * c3 - 199), 333 * c1 + c1 / 3), 1000 * c0 - 500 * c5 - 501), 333 * c2 + (c2 + 1) / 3); c6 <= min(min(min(min(min(min(333 * c3 - (-c3 + 3) / 3 + 334, 1000), 333 * c2 - (-c2 + 3) / 3 + 333), 1000 * c0 - 500 * c5 + 997), 500 * c5 + 501), 500 * c0 + 499), -200 * c1 + 400 * c3 + 400); c6 += 1)
            for (int c7 = max(max(max(max(c6, 500 * c1 + (c6 + 1) / 2), 1000 * c0 - c6), 500 * c5 + 2), 1000 * c3 - 2 * c6 + 2); c7 <= min(min(min(min(500 * c5 + 501, 2 * c6 + 1), 1000 * c3 - 2 * c6 + 1001), 1000 * c0 - c6 + 999), 500 * c1 + (c6 + 1) / 2 + 499); c7 += 1)
              s0(c0, c1, c2, c3, c2 / 3, c5, c6, c7);
Esempio n. 19
0
for (int c0 = floord(m, 4); c0 <= n; c0 += 1)
  s0(c0);
Esempio n. 20
0
int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  for (annot_i=0; annot_i<REPS; annot_i++)
    {
      annot_t_start = rtclock();


/*@ begin PerfTuning (         
  def build 
  { 
    arg build_command = 'icc -O3 -openmp -I/usr/local/icc/include -lm'; 
  } 
    
  def performance_counter          
  { 
    arg repetitions = 1; 
  }

  def performance_params 
  {
#    param T1_1[] = [1,16,32,64,128];
#    param T1_2[] = [1,16,32,64,128];
#    param T1_3[] = [1,16,32,64,128];
#    param T2_1[] = [1,4,8,16,32];
#    param T2_2[] = [1,4,8,16,32];
#    param T2_3[] = [1,4,8,16,32];

    param T1_1[] = [64];
    param T1_2[] = [256];
    param T1_3[] = [64];
    param T2_1[] = [1];
    param T2_2[] = [1];
    param T2_3[] = [1];

    constraint c1 = (T1_1*T2_1<=1024 and T1_1*T2_1<=1024 and T1_1*T2_1<=1024);
    constraint c2 = ((T1_1 == T1_3) and (T2_1 == T2_3));

    param U1[] = [1];
    param U2[] = [1];
    param U3[] = [7];

    constraint c3 = (U1*U2*U3<=512);

    param PERM[] = [
      #[0,1,2],
      #[0,2,1],
      #[1,0,2],
      #[1,2,0],
      [2,0,1],
      #[2,1,0],
      ];

    param PAR[] = [True];
    param SCREP[] = [False];
    param IVEC[] = [True];
  }

  def search 
  { 
    arg algorithm = 'Exhaustive'; 
#    arg algorithm = 'Simplex'; 
#    arg time_limit = 5;
#    arg total_runs = 1;
  } 
   
  def input_params 
  {
    param N[] = [1024];
  }

  def input_vars
  {
    arg decl_file = 'decl_code.h';
    arg init_file = 'init_code.c';
  }
) @*/

/**-- (Generated by Orio) 
Best performance cost: 
  0.201184 
Tuned for specific problem sizes: 
  N = 1024 
Best performance parameters: 
  IVEC = True 
  PAR = True 
  PERM = [2, 0, 1] 
  SCREP = False 
  T1_1 = 64 
  T1_2 = 256 
  T1_3 = 64 
  T2_1 = 1 
  T2_2 = 1 
  T2_3 = 1 
  U1 = 1 
  U2 = 1 
  U3 = 7 
--**/

 

register int i,j,k;
register int c1t, c2t, c3t, c4t, c5t, c6t, c7t, c8t, c9t, c10t, c11t, c12t;
register int newlb_c1, newlb_c2, newlb_c3, newlb_c4, newlb_c5, newlb_c6,
  newlb_c7, newlb_c8, newlb_c9, newlb_c10, newlb_c11, newlb_c12;
register int newub_c1, newub_c2, newub_c3, newub_c4, newub_c5, newub_c6,
  newub_c7, newub_c8, newub_c9, newub_c10, newub_c11, newub_c12;


/*@ begin PolySyn(    
  parallel = PAR;
  tiles = [T1_1,T1_2,T1_3,T2_1,T2_2,T2_3];
  permut = PERM;
  unroll_factors = [U1,U2,U3];
  scalar_replace = SCREP;
  vectorize = IVEC;
    
  profiling_code = 'lu_profiling.c';
  compile_cmd = 'gcc';
  compile_opts = '-lm';
  ) @*/

#include <math.h>
#include <assert.h>

#define ceild(n,d)  ceil(((double)(n))/((double)(d)))
#define floord(n,d) floor(((double)(n))/((double)(d)))
#define max(x,y)    ((x) > (y)? (x) : (y))
#define min(x,y)    ((x) < (y)? (x) : (y))

		
	int c1, c2, c3, c4, c5, c6, c7, c8, c9;

	register int lb, ub, lb1, ub1, lb2, ub2;
/* Generated from PLuTo-produced CLooG file by CLooG v0.14.1 64 bits in 0.05s. */
for (c1=-1;c1<=floord(5*N-9,256);c1++) {
	lb1=max(max(ceild(32*c1-127,160),ceild(64*c1-N+2,64)),0);
	ub1=min(floord(64*c1+63,64),floord(N-1,256));
#pragma omp parallel for shared(c1,lb1,ub1) private(c2,c3,c4,c5,c6,c7,c8,c9)
	for (c2=lb1; c2<=ub1; c2++) {
    for (c3=max(ceild(32*c1-32*c2-1953,2016),ceild(32*c1-32*c2-31,32));c3<=floord(N-1,64);c3++) {
      if (c1 == c2+c3) {
        for (c7=max(64*c3,0);c7<=min(min(N-2,64*c3+62),256*c2+254);c7++) {
          for (c8=max(c7+1,256*c2);c8<=min(N-1,256*c2+255);c8++) {
            A[c7][c8]=A[c7][c8]/A[c7][c7] ;
            for (c9=c7+1;c9<=min(N-1,64*c3+63);c9++) {
              A[c9][c8]=A[c9][c8]-A[c9][c7]*A[c7][c8] ;
            }
          }
        }
      }
/*@ begin Loop(
transform Composite(
permut = [['c9', 'c7', 'c8']],
  regtile = (['c7', 'c8', 'c9'],[1, 1, 7]),
  scalarreplace = (False, 'double'),
  vector = (True, ['ivdep','vector always']))

      for (c7=max(0,64*c1-64*c2);c7<=min(min(256*c2+254,64*c1-64*c2+63),64*c3-1);c7++) {
        for (c8=max(c7+1,256*c2);c8<=min(256*c2+255,N-1);c8++) {
          for (c9=64*c3;c9<=min(N-1,64*c3+63);c9++) {
            A[c9][c8]=A[c9][c8]-A[c9][c7]*A[c7][c8] ;
          }
        }
      }

) @*/{
  for (c9t=64*c3; c9t<=min(N-1,64*c3+63)-6; c9t=c9t+7) {
    for (c7=max(0,64*c1-64*c2); c7<=min(min(256*c2+254,64*c1-64*c2+63),64*c3-1); c7++ ) {
      register int cbv_1, cbv_2;
      cbv_1=max(c7+1,256*c2);
      cbv_2=min(256*c2+255,N-1);
#pragma ivdep
#pragma vector always
      for (c8=cbv_1; c8<=cbv_2; c8++ ) {
        A[c9t][c8]=A[c9t][c8]-A[c9t][c7]*A[c7][c8];
        A[(c9t+1)][c8]=A[(c9t+1)][c8]-A[(c9t+1)][c7]*A[c7][c8];
        A[(c9t+2)][c8]=A[(c9t+2)][c8]-A[(c9t+2)][c7]*A[c7][c8];
        A[(c9t+3)][c8]=A[(c9t+3)][c8]-A[(c9t+3)][c7]*A[c7][c8];
        A[(c9t+4)][c8]=A[(c9t+4)][c8]-A[(c9t+4)][c7]*A[c7][c8];
        A[(c9t+5)][c8]=A[(c9t+5)][c8]-A[(c9t+5)][c7]*A[c7][c8];
        A[(c9t+6)][c8]=A[(c9t+6)][c8]-A[(c9t+6)][c7]*A[c7][c8];
      }
    }
  }
  for (c9=c9t; c9<=min(N-1,64*c3+63); c9=c9+1) {
    for (c7=max(0,64*c1-64*c2); c7<=min(min(256*c2+254,64*c1-64*c2+63),64*c3-1); c7++ ) {
      register int cbv_3, cbv_4;
      cbv_3=max(c7+1,256*c2);
      cbv_4=min(256*c2+255,N-1);
#pragma ivdep
#pragma vector always
      for (c8=cbv_3; c8<=cbv_4; c8++ ) {
        A[c9][c8]=A[c9][c8]-A[c9][c7]*A[c7][c8];
      }
    }
  }
}
/*@ end @*/

      if ((-c1 == -c2-c3) && (c1 <= min(floord(320*c2+191,64),floord(64*c2+N-65,64)))) {
        for (c8=max(256*c2,64*c1-64*c2+64);c8<=min(256*c2+255,N-1);c8++) {
          A[64*c1-64*c2+63][c8]=A[64*c1-64*c2+63][c8]/A[64*c1-64*c2+63][64*c1-64*c2+63] ;
        }
      }
    }
  }
}
/* End of CLooG code */

/*@ end @*/
/*@ end @*/




      annot_t_end = rtclock();
      annot_t_total += annot_t_end - annot_t_start;
    }

  annot_t_total = annot_t_total / REPS;

#ifndef TEST
  printf("%f\n", annot_t_total);
#else
  {
    int i, j;
    for (i=0; i<N; i++) {
      for (j=0; j<N; j++) {
        if (j%100==0)
          printf("\n");
        printf("%f ",A[i][j]);
      }
      printf("\n");
    }
  }
#endif

  return ((int) A[0][0]);

}
Esempio n. 21
0
/* Generated from ../../../git/cloog/test/isl/jacobi-shared.cloog by CLooG 0.16.3-2-g5511bef gmp bits in 1.82s. */
if ((h0+1)%2 == 0) {
  if ((16*floord(t0-1,16) >= -N+g1+t0+1) && (16*floord(N+15*g1+15*t0+15,16) >= 15*g1+15*t0+19) && (32*floord(t1-1,32) <= g2+t1-3) && (32*floord(t1-1,32) >= -N+g2+t1+1)) {
    for (c0=max(-16*floord(t0-1,16)+t0,-16*floord(g1+t0-3,16)+t0);c0<=min(32,N-g1-1);c0+=16) {
      c1 = -32*floord(t1-1,32)+t1;
      if (c1 <= 32) {
        S1(c0+g1-1,c1+g2-1);
      }
    }
  }
}
Esempio n. 22
0
for (int c0 = 0; c0 <= 5 * n; c0 += 1)
  for (int c1 = max(-((5 * n - c0 + 1) % 2) - n + c0 + 1, 2 * floord(c0 - 1, 3) + 2); c1 <= min(c0, n + c0 - (n + c0 + 2) / 3); c1 += 2)
    S1((3 * c1 / 2) - c0, c0 - c1);
Esempio n. 23
0
int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  for (annot_i=0; annot_i<REPS; annot_i++)
  {
    annot_t_start = rtclock();
    



#include <math.h>
#include <assert.h>

#define ceild(n,d)  ceil(((double)(n))/((double)(d)))
#define floord(n,d) floor(((double)(n))/((double)(d)))
#define max(x,y)    ((x) > (y)? (x) : (y))
#define min(x,y)    ((x) < (y)? (x) : (y))

	#define S1(zT0,zT1,zT2,zT3,zT4,zT5,t,i,j)	{A[i][j]=(A[1+i][1+j]+A[1+i][j]+A[1+i][j-1]+A[i][1+j]+A[i][j]+A[i][j-1]+A[i-1][1+j]+A[i-1][j]+A[i-1][j-1])/9;}

	int c1, c2, c3, c4, c5, c6, c7, c8, c9;

	register int lb, ub, lb1, ub1, lb2, ub2;
	register int lbv, ubv;

	omp_set_nested(1);
	omp_set_num_threads(2);
/* Generated from PLuTo-produced CLooG file by CLooG v0.14.1 64 bits in 5.45s. */
for (c1=-2;c1<=floord(4*T+3*N-10,256);c1++) {
	lb1=max(max(max(0,ceild(256*c1-2*T-N-251,512)),ceild(256*c1-3*T-2*N+7,256)),ceild(256*c1-N-761,1024));
	ub1=min(min(min(floord(256*c1+2*N+505,1024),floord(256*c1+509,512)),floord(64*c1+127,64)),floord(T+N-3,256));
#pragma omp parallel for shared(c1,lb1,ub1) private(lb2,ub2,c2,c3,c4,c5,c6,c7,c8,c9)
	for (c2=lb1; c2<=ub1; c2++) {
	lb2=max(max(max(max(max(max(ceild(256*c1-256*c2-T+1,256),ceild(512*c1-512*c2-253,768)),0),ceild(512*c2-N-252,256)),ceild(128*c1-256*c2-127,128)),ceild(128*c2-127,128)),ceild(128*c1-127,256));
	ub2=min(min(min(min(min(min(floord(256*c1-256*c2+255,256),floord(256*c1-512*c2+N+253,256)),floord(256*c2+T+N+252,256)),floord(T+N-3,128)),floord(256*c1+N+508,512)),floord(256*c1-256*c2+N+253,384)),floord(512*c2+N+507,256));
#pragma omp parallel for shared(c1,c2,lb1,ub1,lb2,ub2) private(c3,c4,c5,c6,c7,c8,c9)
	for (c3=lb2; c3<=ub2; c3++) {
      for (c4=max(max(max(max(0,ceild(-256*c2+256*c3-N-284,32)),8*c1-8*c2-8*c3),ceild(256*c2-N-29,32)),ceild(128*c3-N-29,32));c4<=min(min(min(min(8*c1-8*c2-8*c3+7,floord(256*c3+253,64)),floord(T-1,32)),floord(128*c2+127,16)),floord(-128*c2+128*c3+127,16));c4++) {
        for (c5=max(max(max(max(max(8*c2,ceild(16*c4-15,16)),ceild(256*c3-T-N-28,32)),0),ceild(256*c3-32*c4-N-60,32)),ceild(256*c3-N-59,64));c5<=min(min(min(min(min(floord(32*c4+N+29,32),floord(128*c3+127,16)),8*c2+7),floord(128*c3-16*c4+127,16)),floord(T+N-3,32)),floord(256*c3+N+252,64));c5++) {
          for (c6=max(max(max(max(max(ceild(64*c4-29,32),8*c3),ceild(16*c5-15,16)),ceild(16*c4+16*c5-15,16)),0),ceild(64*c5-N-28,32));c6<=min(min(min(min(min(8*c3+7,floord(T+N-3,16)),floord(32*c4+32*c5+N+60,32)),floord(32*c4+N+29,16)),floord(64*c5+N+59,32)),floord(32*c5+T+N+28,32));c6++) {
            for (c7=max(max(max(max(0,32*c4),32*c5-N+2),16*c6-N+2),-32*c5+32*c6-N-29);c7<=min(min(min(min(-32*c5+32*c6+30,floord(32*c6+29,2)),T-1),32*c5+30),32*c4+31);c7++) {
/*@ begin Loop(
transform UnrollJam(ufactor=8)
              for (c8=max(max(32*c5,c7+1),32*c6-c7-N+2);c8<=min(min(32*c6-c7+30,32*c5+31),c7+N-2);c8++) 
transform Unroll(ufactor=8)
                for (c9=max(c7+c8+1,32*c6);c9<=min(32*c6+31,c7+c8+N-2);c9++) 
{
                  S1(c1-c2-c3,-c1+2*c2+c3,-c1+2*c3,c4,-c4+c5,-c4-c5+c6,c7,-c7+c8,-c7-c8+c9) ;
}
) @*/
              for (c8=max(max(32*c5,c7+1),32*c6-c7-N+2);c8<=min(min(32*c6-c7+30,32*c5+31),c7+N-2);c8++) {
                for (c9=max(c7+c8+1,32*c6);c9<=min(32*c6+31,c7+c8+N-2);c9++) {
                  S1(c1-c2-c3,-c1+2*c2+c3,-c1+2*c3,c4,-c4+c5,-c4-c5+c6,c7,-c7+c8,-c7-c8+c9) ;
                }
              }
/*@ end @*/
            }
          }
        }
      }
    }
  }
}
/* End of CLooG code */


    annot_t_end = rtclock();
    annot_t_total += annot_t_end - annot_t_start;
  }
  
  annot_t_total = annot_t_total / REPS;
  printf("%f\n", annot_t_total);
  
  return ((int) A[0][0]); 

}
Esempio n. 24
0
int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  omp_set_nested(1);
  omp_set_num_threads(2);

  for (annot_i=0; annot_i<REPS; annot_i++)
  {
    annot_t_start = rtclock();
   
    register int i,j,k;


    
	#define S1(zT0,zT1,zT2,zT3,k,j)	{A[k][j]=A[k][j]/A[k][k];}
	#define S2(zT0,zT1,zT2,zT3,zT4,zT5,k,i,j)	{A[i][j]=A[i][j]-A[i][k]*A[k][j];}

	int c1, c2, c3, c4, c5, c6, c7, c8, c9;

	register int lb, ub, lb1, ub1, lb2, ub2;
	register int lbv, ubv;

/* Generated from PLuTo-produced CLooG file by CLooG v0.14.1 64 bits in 2.21s. */
for (c1=-2;c1<=floord(3*N-4,256);c1++) {
	lb1=max(max(0,ceild(256*c1-N-253,512)),ceild(256*c1-2*N+3,256));
	ub1=min(floord(128*c1+255,128),floord(N-1,256));
#pragma omp parallel for shared(c1,lb1,ub1) private(lb2,ub2,c2,c3,c4,c5,c6,c7,c8,c9)
	for (c2=lb1; c2<=ub1; c2++) {
	lb2=max(max(max(ceild(256*c1-256*c2-N+2,256),ceild(128*c1-256*c2-127,128)),ceild(128*c1-128*c2-32385,32768)),ceild(128*c1-128*c2-127,256));
	ub2=min(floord(N-1,256),floord(256*c1-256*c2+255,256));
#pragma omp parallel for shared(c1,c2,lb1,ub1,lb2,ub2) private(c3,c4,c5,c6,c7,c8,c9)
	for (c3=lb2; c3<=ub2; c3++) {
      for (c4=max(max(8*c1-8*c2-8*c3,0),8*c1-8*c2-1800*c3-1778);c4<=min(min(min(min(floord(3968*c3+3937,16),8*c1-8*c2-8*c3+7),floord(128*c2+127,16)),floord(N-2,32)),floord(128*c3+127,16));c4++) {
        for (c5=max(max(ceild(16*c4-15,16),0),8*c2);c5<=min(floord(N-1,32),8*c2+7);c5++) {
          for (c6=max(max(max(max(ceild(16*c4-465,496),ceild(8*c1-8*c2-16*c3-c4-217,223)),ceild(-8*c1+8*c2+16*c3+c4-217,225)),8*c3),ceild(16*c4-15,16));c6<=min(8*c3+7,floord(N-1,32));c6++) {
            if ((c1 == c2+2*c3) && (c4 == c6)) {
              for (c7=max(0,32*c6);c7<=min(min(32*c5+30,32*c6+30),N-2);c7++) {
                for (c8=max(c7+1,32*c5);c8<=min(32*c5+31,N-1);c8++) {
                  if ((c1-c2)%2 == 0) {
                    S1((c1-c2)/2,c2,c4,c5,c7,c8) ;
                  }
                  for (c9=c7+1;c9<=min(32*c6+31,N-1);c9++) {
                    if ((c1-c2)%2 == 0) {
                      if ((c1-c2)%2 == 0) {
                        S2((c1-c2)/2,(c1-c2)/2,c2,c4,c4,c5,c7,c9,c8) ;
                      }
                    }
                  }
                }
              }
            }
            for (c7=max(32*c4,0);c7<=min(min(32*c6-1,32*c5+30),32*c4+31);c7++) {
/*@ begin Loop(
transform UnrollJam(ufactor=8)
              for (c8=max(c7+1,32*c5);c8<=min(32*c5+31,N-1);c8++) 
transform Unroll(ufactor=8)
                for (c9=32*c6;c9<=min(N-1,32*c6+31);c9++) 
{
                  S2(c1-c2-c3,c3,c2,c4,c6,c5,c7,c9,c8) ;
}
) @*/{ 

  for (c8 = max(c7 + 1, 32 * c5); c8 <= min(32 * c5 + 31, N - 1) - 7; c8 = c8 + 8)     { 

      for (c9 = 32 * c6; c9 <= min(N - 1, 32 * c6 + 31) - 7; c9 = c9 + 8)         { 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 7)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), (c8 + 7)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), (c8 + 7)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), (c8 + 7)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), (c8 + 7)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), (c8 + 7)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), (c8 + 7)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), (c8 + 7)); 
        } 

      for (; c9 <= min(N - 1, 32 * c6 + 31); c9 = c9 + 1)         { 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 1)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 2)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 3)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 4)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 5)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 6)); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, (c8 + 7)); 
        } 
    } 

  for (; c8 <= min(32 * c5 + 31, N - 1); c8 = c8 + 1)     { 

      for (c9 = 32 * c6; c9 <= min(N - 1, 32 * c6 + 31) - 7; c9 = c9 + 8)         { 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 1), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 2), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 3), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 4), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 5), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 6), c8); 
          S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, (c9 + 7), c8); 
        } 

      for (; c9 <= min(N - 1, 32 * c6 + 31); c9 = c9 + 1)         S2(c1 - c2 - c3, c3, c2, c4, c6, c5, c7, c9, c8); 
    } 
} 
/*@ end @*/
            }
            if ((c1 == c2+2*c3) && (-c4 == -c6) && (c4 <= min(floord(N-33,32),floord(32*c5-1,32)))) {
              for (c8=max(32*c5,32*c4+32);c8<=min(N-1,32*c5+31);c8++) {
                if ((c1-c2)%2 == 0) {
                  S1((c1-c2)/2,c2,c4,c5,32*c4+31,c8) ;
                }
              }
            }
          }
        }
      }
    }
  }
}
/* End of CLooG code */



    annot_t_end = rtclock();
    annot_t_total += annot_t_end - annot_t_start;
  }
  
  annot_t_total = annot_t_total / REPS;
  printf("%f\n", annot_t_total);
  
  return ((int) A[0][0]); 

}
Esempio n. 25
0
int main(void) {
  int t = 0, z, y, x, k;
  double total_lattice_pts =
      (double)nZ * (double)nY * (double)nX * (double)nTimesteps;

  /* For timekeeping */
  int ts_return = -1;
  struct timeval start, end, result;
  double tdiff = 0.0;

  /* Compute values for global parameters */
  omega = 2.0 /
          ((6.0 * sqrt(uTopX * uTopX + uTopY * uTopY) * (nX - 0.5) / re) + 1.0);

  printf(
      "3D Lid Driven Cavity simulation with D3Q19 lattice:\n"
      "\tscheme     : 3-Grid, Fused, Pull\n"
      "\tgrid size  : %d x %d x %d = %.2lf * 10^3 Cells\n"
      "\tnTimeSteps : %d\n"
      "\tRe         : %.2lf\n"
      "\tuTopX      : %.6lf\n"
      "\tuTopY      : %.6lf\n"
      "\tomega      : %.6lf\n",
      nX, nY, nZ, nX * nY * nZ / 1.0e3, nTimesteps, re, uTopX, uTopY, omega);

  /* Initialize all 19 PDFs for each point in the domain to w1, w2 or w3
   * accordingly */
  for (z = 0; z < nZ + 2 + 4; z++) {
    for (y = 0; y < nY + 2 + 2; y++) {
      for (x = 0; x < nX + 2 + 2; x++) {
        grid[0][z][y][x][0] = w1;
        grid[1][z][y][x][0] = w1;

        for (k = 1; k < 7; k++) {
          grid[0][z][y][x][k] = w2;
          grid[1][z][y][x][k] = w2;
        }

        for (k = 7; k < nK; k++) {
          grid[0][z][y][x][k] = w3;
          grid[1][z][y][x][k] = w3;
        }
      }
    }
  }

  /* To satisfy PET */
  short _nX = nX + 3;
  short _nY = nY + 3;
  short _nZ = nZ + 4;
  short _nTimesteps = nTimesteps;

#ifdef TIME
  gettimeofday(&start, 0);
#endif

  int t1, t2, t3, t4, t5, t6, t7, t8;
  int lb, ub, lbp, ubp, lb2, ub2;
  register int lbv, ubv;
  /* Start of CLooG code */
  if ((_nTimesteps >= 1) && (_nX >= 2) && (_nY >= 2) && (_nZ >= 3)) {
    for (t1 = -1; t1 <= floord(_nTimesteps - 1, 8); t1++) {
      lbp = max(ceild(t1, 2), ceild(16 * t1 - _nTimesteps + 3, 16));
      ubp =
          min(floord(_nTimesteps + _nZ - 2, 16), floord(8 * t1 + _nZ + 6, 16));
#pragma omp parallel for private(lbv, ubv, t3, t4, t5, t6, t7, t8)
      for (t2 = lbp; t2 <= ubp; t2++) {
        for (t3 = max(max(0, ceild(t1 - 1, 2)), ceild(16 * t2 - _nZ - 13, 16));
             t3 <= min(min(min(floord(_nTimesteps + _nY - 2, 16),
                               floord(8 * t1 + _nY + 14, 16)),
                           floord(16 * t2 + _nY + 12, 16)),
                       floord(16 * t1 - 16 * t2 + _nZ + _nY + 13, 16));
             t3++) {
          for (t4 = max(
                   max(max(0, ceild(t1 - 1, 2)), ceild(16 * t2 - _nZ - 13, 16)),
                   ceild(16 * t3 - _nY - 13, 16));
               t4 <= min(min(min(min(floord(_nTimesteps + _nX - 2, 16),
                                     floord(8 * t1 + _nX + 14, 16)),
                                 floord(16 * t2 + _nX + 12, 16)),
                             floord(16 * t3 + _nX + 13, 16)),
                         floord(16 * t1 - 16 * t2 + _nZ + _nX + 13, 16));
               t4++) {
            for (t5 = max(max(max(max(max(0, 8 * t1), 16 * t1 - 16 * t2 + 2),
                                  16 * t2 - _nZ + 1),
                              16 * t3 - _nY + 1),
                          16 * t4 - _nX + 1);
                 t5 <= min(min(min(min(min(_nTimesteps - 1, 8 * t1 + 15),
                                       16 * t2 + 13),
                                   16 * t3 + 14),
                               16 * t4 + 14),
                           16 * t1 - 16 * t2 + _nZ + 14);
                 t5++) {

              /* Hoisted loop conditional */
              if (t5 % 2 == 0) {
                for (t6 = max(max(16 * t2, t5 + 2),
                              -16 * t1 + 16 * t2 + 2 * t5 - 15);
                     t6 <= min(min(16 * t2 + 15, -16 * t1 + 16 * t2 + 2 * t5),
                               t5 + _nZ - 1);
                     t6++) {
                  for (t7 = max(16 * t3, t5 + 1);
                       t7 <= min(16 * t3 + 15, t5 + _nY - 1); t7++) {
                    lbv = max(16 * t4, t5 + 1);
                    ubv = min(16 * t4 + 15, t5 + _nX - 1);

#pragma ivdep
#pragma vector always
                    for (t8 = lbv; t8 <= ubv; t8++) {
                      lbm_kernel(
                          grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][0],
                          grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8) - 1][1],
                          grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8) + 1][2],
                          grid[0][(-t5 + t6)][(-t5 + t7) - 1][(-t5 + t8)][3],
                          grid[0][(-t5 + t6)][(-t5 + t7) + 1][(-t5 + t8)][4],
                          grid[0][(-t5 + t6) - 1][(-t5 + t7)][(-t5 + t8)][5],
                          grid[0][(-t5 + t6) + 1][(-t5 + t7)][(-t5 + t8)][6],
                          grid[0][(-t5 + t6)][(-t5 + t7) - 1][(-t5 + t8) - 1]
                              [7],
                          grid[0][(-t5 + t6)][(-t5 + t7) - 1][(-t5 + t8) + 1]
                              [8],
                          grid[0][(-t5 + t6)][(-t5 + t7) + 1][(-t5 + t8) - 1]
                              [9],
                          grid[0][(-t5 + t6)][(-t5 + t7) + 1][(-t5 + t8) + 1]
                              [10],
                          grid[0][(-t5 + t6) - 1][(-t5 + t7)][(-t5 + t8) - 1]
                              [11],
                          grid[0][(-t5 + t6) - 1][(-t5 + t7)][(-t5 + t8) + 1]
                              [12],
                          grid[0][(-t5 + t6) + 1][(-t5 + t7)][(-t5 + t8) - 1]
                              [13],
                          grid[0][(-t5 + t6) + 1][(-t5 + t7)][(-t5 + t8) + 1]
                              [14],
                          grid[0][(-t5 + t6) - 1][(-t5 + t7) - 1][(-t5 + t8)]
                              [15],
                          grid[0][(-t5 + t6) - 1][(-t5 + t7) + 1][(-t5 + t8)]
                              [16],
                          grid[0][(-t5 + t6) + 1][(-t5 + t7) - 1][(-t5 + t8)]
                              [17],
                          grid[0][(-t5 + t6) + 1][(-t5 + t7) + 1][(-t5 + t8)]
                              [18],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][0],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][1],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][2],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][3],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][4],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][5],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][6],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][7],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][8],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][9],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][10],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][11],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][12],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][13],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][14],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][15],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][16],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][17],
                          &grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][18],
                          (t5), ((-t5 + t6)), ((-t5 + t7)), ((-t5 + t8)));
                      ;
                    }
                  }
                }
              } else {
                for (t6 = max(max(16 * t2, t5 + 2),
                              -16 * t1 + 16 * t2 + 2 * t5 - 15);
                     t6 <= min(min(16 * t2 + 15, -16 * t1 + 16 * t2 + 2 * t5),
                               t5 + _nZ - 1);
                     t6++) {
                  for (t7 = max(16 * t3, t5 + 1);
                       t7 <= min(16 * t3 + 15, t5 + _nY - 1); t7++) {
                    lbv = max(16 * t4, t5 + 1);
                    ubv = min(16 * t4 + 15, t5 + _nX - 1);

#pragma ivdep
#pragma vector always
                    for (t8 = lbv; t8 <= ubv; t8++) {
                      lbm_kernel(
                          grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][0],
                          grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8) - 1][1],
                          grid[1][(-t5 + t6)][(-t5 + t7)][(-t5 + t8) + 1][2],
                          grid[1][(-t5 + t6)][(-t5 + t7) - 1][(-t5 + t8)][3],
                          grid[1][(-t5 + t6)][(-t5 + t7) + 1][(-t5 + t8)][4],
                          grid[1][(-t5 + t6) - 1][(-t5 + t7)][(-t5 + t8)][5],
                          grid[1][(-t5 + t6) + 1][(-t5 + t7)][(-t5 + t8)][6],
                          grid[1][(-t5 + t6)][(-t5 + t7) - 1][(-t5 + t8) - 1]
                              [7],
                          grid[1][(-t5 + t6)][(-t5 + t7) - 1][(-t5 + t8) + 1]
                              [8],
                          grid[1][(-t5 + t6)][(-t5 + t7) + 1][(-t5 + t8) - 1]
                              [9],
                          grid[1][(-t5 + t6)][(-t5 + t7) + 1][(-t5 + t8) + 1]
                              [10],
                          grid[1][(-t5 + t6) - 1][(-t5 + t7)][(-t5 + t8) - 1]
                              [11],
                          grid[1][(-t5 + t6) - 1][(-t5 + t7)][(-t5 + t8) + 1]
                              [12],
                          grid[1][(-t5 + t6) + 1][(-t5 + t7)][(-t5 + t8) - 1]
                              [13],
                          grid[1][(-t5 + t6) + 1][(-t5 + t7)][(-t5 + t8) + 1]
                              [14],
                          grid[1][(-t5 + t6) - 1][(-t5 + t7) - 1][(-t5 + t8)]
                              [15],
                          grid[1][(-t5 + t6) - 1][(-t5 + t7) + 1][(-t5 + t8)]
                              [16],
                          grid[1][(-t5 + t6) + 1][(-t5 + t7) - 1][(-t5 + t8)]
                              [17],
                          grid[1][(-t5 + t6) + 1][(-t5 + t7) + 1][(-t5 + t8)]
                              [18],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][0],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][1],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][2],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][3],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][4],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][5],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][6],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][7],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][8],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][9],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][10],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][11],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][12],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][13],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][14],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][15],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][16],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][17],
                          &grid[0][(-t5 + t6)][(-t5 + t7)][(-t5 + t8)][18],
                          (t5), ((-t5 + t6)), ((-t5 + t7)), ((-t5 + t8)));
                      ;
                    }
                  }
                }
              }
              /* end hoisted if */
            }
          }
        }
      }
    }
  }
/* End of CLooG code */

#ifdef TIME
  gettimeofday(&end, 0);

  ts_return = timeval_subtract(&result, &end, &start);
  tdiff = (double)(result.tv_sec + result.tv_usec * 1.0e-6);

  printf("\tTime taken : %7.5lfm\n", tdiff / 60.0);
  printf("\tMLUPS      : %7.5lf\n", (total_lattice_pts / (1.0e6 * tdiff)));
#endif

#ifdef DEBUG
  /* Dump rho, uX, uY for the entire domain to verify results */
  dumpVelocities(t);
#endif

  return 0;
}
Esempio n. 26
0
File: .body.c Progetto: 8l/rose
	#define S1(zT0,zT1,zT2,zT3,i,j)	B[i][j]=A[i][j]+u1[i]*v1[j]+u2[i]*v2[j];
	#define S2(zT0,zT1,zT2,zT3,i,j)	x[i]=x[i]+beta*B[j][i]*y[j];
	#define S3(zT0,zT1,zT2,zT3,i)	x[i]=x[i]+z[i];
	#define S4(zT0,zT1,zT2,zT3,i,j)	w[i]=w[i]+alpha*B[i][j]*x[j];

		int t0, t1, t2, t3, t4, t5, t6, t7;

	register int lb, ub, lb1, ub1, lb2, ub2;
	register int lbv, ubv;

/* Generated from PLUTO-produced CLooG file by CLooG v0.14.1 64 bits in 0.03s. */
	lb1=0;
	ub1=floord(N-1,8000);
#pragma omp parallel for shared(t0,lb1,ub1) private(t1,t2,t3,t4,t5,t6,t7)
	for (t1=lb1; t1<=ub1; t1++) {
  for (t2=0;t2<=floord(N-1,256);t2++) {
    for (t3=max(20*t1,0);t3<=min(20*t1+19,floord(N-1,400));t3++) {
      for (t4=max(0,16*t2);t4<=min(16*t2+15,floord(N-1,16));t4++) {
        for (t5=max(0,16*t4);t5<=min(N-1,16*t4+15);t5++) {
{
	lbv=max(0,400*t3); 	ubv=min(N-1,400*t3+399);
#pragma ivdep
#pragma vector always
	for (t6=lbv; t6<=ubv; t6++) {
            S1(t1,t2,t3,t4,t5,t6) ;
            S2(t1,t2,t3,t4,t6,t5) ;
          }
}
        }
      }
Esempio n. 27
0
for (int c1 = 5; c1 <= 5 * M; c1 += 1) {
  for (int c3 = max(2, floord(-M + c1, 4)); c3 < min(M, (c1 + 1) / 3 - 2); c3 += 1)
    for (int c5 = max(1, -M - c3 + (M + c1) / 2 - 2); c5 < min(c3, -2 * c3 + (c1 + c3) / 2 - 2); c5 += 1)
      S1(c1 - 2 * c3 - 2 * c5 - 5, c3, c5);
  for (int c3 = max(1, floord(-M + c1, 4)); c3 < (c1 + 1) / 5; c3 += 1)
    S2(c1 - 4 * c3 - 3, c3);
  if (c1 % 5 == 0)
    S4(c1 / 5);
  for (int c3 = max(-3 * M - c1 + 3 * ((M + c1) / 2) + 1, -((c1 - 1) % 3) + 3); c3 < (c1 + 1) / 5; c3 += 3)
    S3((c1 - 2 * c3 - 1) / 3, c3);
}
Esempio n. 28
0
if (n % 2 == 0)
  for (int c0 = (n / 2) + 2 * floord(-n - 1, 4) + 2; c0 <= 100; c0 += 2)
    S(c0);
Esempio n. 29
0
void test(int n)
{
  /* Scattering iterators. */
  int t1, t2, t3;
  /* Original iterators. */
  int i, j, k;
  if (n >= 1) {
    t1 = -n+1 ;
    t2 = n+1 ;
    for (t3=n+3;t3<=3*n+1;t3++) {
      if ((t3+n+1)%2 == 0) {
        k = (t3-n-1)/2 ;
        S1(1,n,(t3-n-1)/2) ;
      }
    }
  }
  if ((n >= 2) && (n <= 2)) {
    t1 = -n+2 ;
    for (t2=-n+4;t2<=3*n-2;t2++) {
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t2+n)%2 == 0) {
          i = (t2-n+2)/2 ;
          j = (t2+n-2)/2 ;
          if ((t3+n)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t2-n+2)/2,(t2+n-2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    t2 = n+3 ;
    for (t3=1;t3<=n;t3++) {
      S2(1,n,t3) ;
    }
  }
  if (n >= 3) {
    t1 = -n+2 ;
    for (t2=n;t2<=n+2;t2++) {
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t2+n)%2 == 0) {
          i = (t2-n+2)/2 ;
          j = (t2+n-2)/2 ;
          if ((t3+n)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t2-n+2)/2,(t2+n-2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    t2 = n+3 ;
    for (t3=1;t3<=n;t3++) {
      S2(1,n,t3) ;
    }
  }
  for (t1=ceild(-2*n+5,2);t1<=min(-n+6,-1);t1++) {
    for (t2=-t1+2;t2<=-t1+4;t2++) {
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=-t1+5;t2<=t1+2*n;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    t2 = t1+2*n+1 ;
    for (t3=1;t3<=n;t3++) {
      i = t1+n-1 ;
      S2(t1+n-1,n,t3) ;
    }
  }
  if (n == 2) {
    for (t3=5;t3<=7;t3++) {
      if ((t3+1)%2 == 0) {
        k = (t3-3)/2 ;
        S1(2,1,(t3-3)/2) ;
      }
    }
    for (t2=4;t2<=6;t2++) {
      for (t3=1;t3<=2;t3++) {
        if (t2%2 == 0) {
          i = (t2-2)/2 ;
          j = (t2-2)/2 ;
          S2((t2-2)/2,(t2-2)/2,t3) ;
        }
      }
    }
  }
  for (t1=-n+7;t1<=-1;t1++) {
    for (t2=-t1+2;t2<=-t1+4;t2++) {
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=-t1+5;t2<=n-2;t2++) {
      for (t3=1;t3<=t2+1;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
      for (t3=t2+2;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
      for (t3=n+1;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=n-1;t2<=t1+2*n;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    t2 = t1+2*n+1 ;
    for (t3=1;t3<=n;t3++) {
      i = t1+n-1 ;
      S2(t1+n-1,n,t3) ;
    }
  }
  if (n >= 3) {
    for (t1=0;t1<=min(1,-n+6);t1++) {
      for (t2=t1+2;t2<=-t1+4;t2++) {
        for (t3=t2+2;t3<=t2+2*n;t3++) {
          if ((t1+t2)%2 == 0) {
            i = (t1+t2)/2 ;
            j = (-t1+t2)/2 ;
            if ((t1+t3)%2 == 0) {
              k = (-t2+t3)/2 ;
              S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
            }
          }
        }
      }
      for (t2=-t1+5;t2<=-t1+2*n;t2++) {
        for (t3=1;t3<=n;t3++) {
          if ((t1+t2+1)%2 == 0) {
            i = (t1+t2-3)/2 ;
            j = (-t1+t2-1)/2 ;
            S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
          }
        }
        for (t3=t2+2;t3<=t2+2*n;t3++) {
          if ((t1+t2)%2 == 0) {
            i = (t1+t2)/2 ;
            j = (-t1+t2)/2 ;
            if ((t1+t3)%2 == 0) {
              k = (-t2+t3)/2 ;
              S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
            }
          }
        }
      }
      for (t2=-t1+2*n+1;t2<=t1+2*n+1;t2++) {
        for (t3=1;t3<=n;t3++) {
          if ((t1+t2+1)%2 == 0) {
            i = (t1+t2-3)/2 ;
            j = (-t1+t2-1)/2 ;
            S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
          }
        }
      }
    }
  }
  for (t1=max(-n+7,0);t1<=1;t1++) {
    for (t2=t1+2;t2<=-t1+4;t2++) {
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=-t1+5;t2<=n-2;t2++) {
      for (t3=1;t3<=t2+1;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
      for (t3=t2+2;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
      for (t3=n+1;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=n-1;t2<=-t1+2*n;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=-t1+2*n+1;t2<=t1+2*n+1;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
    }
  }
  for (t1=2;t1<=n-5;t1++) {
    t2 = t1+2 ;
    for (t3=t1+4;t3<=t1+2*n+2;t3++) {
      i = t1+1 ;
      if ((t1+t3)%2 == 0) {
        k = (-t1+t3-2)/2 ;
        S1(t1+1,1,(-t1+t3-2)/2) ;
      }
    }
    for (t2=t1+3;t2<=n-2;t2++) {
      for (t3=1;t3<=t2+1;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
      for (t3=t2+2;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
      for (t3=n+1;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=n-1;t2<=-t1+2*n;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=-t1+2*n+1;t2<=-t1+2*n+3;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
    }
  }
  for (t1=max(2,n-4);t1<=floord(2*n-3,2);t1++) {
    t2 = t1+2 ;
    for (t3=t1+4;t3<=t1+2*n+2;t3++) {
      i = t1+1 ;
      if ((t1+t3)%2 == 0) {
        k = (-t1+t3-2)/2 ;
        S1(t1+1,1,(-t1+t3-2)/2) ;
      }
    }
    for (t2=t1+3;t2<=-t1+2*n;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
      for (t3=t2+2;t3<=t2+2*n;t3++) {
        if ((t1+t2)%2 == 0) {
          i = (t1+t2)/2 ;
          j = (-t1+t2)/2 ;
          if ((t1+t3)%2 == 0) {
            k = (-t2+t3)/2 ;
            S1((t1+t2)/2,(-t1+t2)/2,(-t2+t3)/2) ;
          }
        }
      }
    }
    for (t2=-t1+2*n+1;t2<=-t1+2*n+3;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t1+t2+1)%2 == 0) {
          i = (t1+t2-3)/2 ;
          j = (-t1+t2-1)/2 ;
          S2((t1+t2-3)/2,(-t1+t2-1)/2,t3) ;
        }
      }
    }
  }
  if (n >= 3) {
    t1 = n-1 ;
    t2 = n+1 ;
    for (t3=n+3;t3<=3*n+1;t3++) {
      if ((t3+n+1)%2 == 0) {
        k = (t3-n-1)/2 ;
        S1(n,1,(t3-n-1)/2) ;
      }
    }
    for (t2=n+2;t2<=n+4;t2++) {
      for (t3=1;t3<=n;t3++) {
        if ((t2+n)%2 == 0) {
          i = (t2+n-4)/2 ;
          j = (t2-n)/2 ;
          S2((t2+n-4)/2,(t2-n)/2,t3) ;
        }
      }
    }
  }
  if (n >= 1) {
    t2 = n+3 ;
    for (t3=1;t3<=n;t3++) {
      S2(n,1,t3) ;
    }
  }
}
Esempio n. 30
0
for (int c0 = 0; c0 < m; c0 += 32)
  for (int c1 = (n >= 32 && m >= c0 + 2) || (m == 1 && c0 == 0) ? 0 : 32 * n - 32 * floord(31 * n + 31, 32); c1 <= ((n <= -1 && c0 == 0) || (m == 1 && n >= 0 && c0 == 0) ? max(0, n - 1) : n); c1 += 32)
    for (int c2 = c0; c2 <= (m >= 2 && c0 + 31 >= m && n >= c1 && c1 + 31 >= n ? 2 * m - 3 : (m >= 2 * c0 + 63 && c1 <= -32 && n >= c1 && c1 + 31 >= n) || (m >= c0 + 32 && 2 * c0 + 62 >= m && n >= c1 && c1 + 31 >= n) || (n >= 0 && c0 >= 32 && m >= 2 * c0 + 63 && c1 == n) || (m >= 63 && n >= 32 && c0 == 0 && c1 == n) ? 2 * c0 + 61 : m - 1); c2 += 32) {
      if (n >= c1 + 32 && c1 >= 0 && 2 * c0 >= c2 + 32) {
        for (int c4 = 0; c4 <= 31; c4 += 1)
          for (int c5 = max(0, c0 - c2 + 1); c5 <= min(31, m - c2 - 1); c5 += 1)
            S_27(c0, c2 + c5, c1 + c4);
      } else if (c0 >= 32 && c1 >= 0 && c2 >= 2 * c0) {
        for (int c4 = 0; c4 <= min(31, n - c1 - 1); c4 += 1)
          for (int c5 = 0; c5 <= min(31, m - c2 - 1); c5 += 1)
            S_27(c0, c2 + c5, c1 + c4);
      } else if (c0 == 0 && c1 >= 0) {
        for (int c4 = 0; c4 <= min(31, n - c1 - 1); c4 += 1)
          for (int c5 = 0; c5 <= min(31, m - c2 - 1); c5 += 1) {
            if (c1 == 0 && c4 == 0)
              S_14(c2 + c5);
            S_19(c1 + c4, c2 + c5);
            if (c2 + c5 >= 1)
              S_27(0, c2 + c5, c1 + c4);
          }
      }
      if (c1 >= 0) {
        for (int c3 = 1; c3 <= min(31, (c2 / 2) - c0); c3 += 1)
          for (int c4 = 0; c4 <= min(31, n - c1 - 1); c4 += 1)
            for (int c5 = 0; c5 <= min(31, m - c2 - 1); c5 += 1)
              S_27(c0 + c3, c2 + c5, c1 + c4);
        if (n >= c1 + 32) {
          for (int c3 = max(1, (c2 / 2) - c0 + 1); c3 <= min(min(31, m - c0 - 2), -c0 + c2 + 30); c3 += 1)
            for (int c4 = 0; c4 <= 31; c4 += 1)
              for (int c5 = max(0, c0 - c2 + c3 + 1); c5 <= min(31, m - c2 - 1); c5 += 1)
                S_27(c0 + c3, c2 + c5, c1 + c4);