Пример #1
0
int main(int argc, const char **argv) {

  ops_init(argc, argv, 5);
  ops_init_backend();
  ops_printf("Hello world from OPS!\n\n");

  ops_block block = ops_decl_block_hdf5(3, "grid0", "write_data.h5");

  ops_dat single =
      ops_decl_dat_hdf5(block, 1, "double", "single", "write_data.h5");
  ops_dat multi =
      ops_decl_dat_hdf5(block, 2, "double", "multi", "write_data.h5");
  ops_dat integ = ops_decl_dat_hdf5(block, 1, "int", "integ", "write_data.h5");

  ops_partition("empty_string_that_does_nothing_yet");
  ops_diagnostic_output();

  ops_fetch_block_hdf5_file(block, "read_data.h5");
  ops_fetch_dat_hdf5_file(multi, "read_data.h5");
  ops_fetch_dat_hdf5_file(single, "read_data.h5");
  ops_fetch_dat_hdf5_file(integ, "read_data.h5");

  int my_const;
  ops_get_const_hdf5("my_const", 1, "int", (char *)&my_const, "write_data.h5");
  printf("Read const: %d\n", my_const);

  char buffer[50];
  ops_get_const_hdf5("my_text", 11, "char", buffer, "write_data.h5");
  printf("Read text: %s\n", buffer);

  ops_write_const_hdf5("my_const", 1, "int", (char *)&my_const, "read_data.h5");
  ops_write_const_hdf5("my_text", 11, "char", (char *)buffer, "read_data.h5");

  ops_timing_output(stdout);
  ops_printf("\nSucessful exit from OPS!\n");
  ops_exit();
}
Пример #2
0
int main(int argc, char *args[]) {
    if(argc != 5) {
        printf("Insufficient arguments. Need 4 arguments. %d\n", argc);
        exit(1);
    }


    time_t t;
    srand((unsigned) time(&t));
    int m = 10000;
    int n = 1000;
    int n_threads = atoi(args[1]);
    Ops ops;
    ops_init(&ops, m, atof(args[2]), atof(args[3]), atof(args[4])); // Workout the number of operations of each type
    Byte *opsList = malloc(sizeof(Byte) * m);

    runDummyThreads(n_threads);
    buildOpsList(opsList, &ops, m); // Build a randomly ordered list of operations to be carried out

    int sample_size = 0;
    int test_samples = 20;
    float mutex_std = 0;
    float rwlock_std = 0;
    float serial_std = 0;

    // arrays for test sample memory size
    float *mutext_time = malloc(sizeof(float) * test_samples);
    float *rwlock_time = malloc(sizeof(float) * test_samples);  //=malloc(sizeof(float)*test_samples)
    float *serial_time = malloc(sizeof(float) * test_samples);

    //all three linked list impletation run
    for (int i = 0; i < test_samples; i++) {
        mutext_time[i] = linkedListMutex(opsList, n_threads, m, n);
        rwlock_time[i] = linkedListRWLock(opsList, n_threads, m, n);
        serial_time[i] = serialLinkedList(opsList, m, n);
    }

    //finding standard deviation of each linked list implementation
    mutex_std = std(mutext_time, test_samples);
    rwlock_std = std(rwlock_time, test_samples);
    serial_std = std(serial_time, test_samples);

    //find average of each linked list implementation
    float mutex_avg = findAverage(mutext_time, test_samples);
    float rwlock_avg = findAverage(rwlock_time, test_samples);
    float serial_avg = findAverage(serial_time, test_samples);

    //find sample size with 5% accurary and 95% confident interval
    int mutext_ss = findSampleSize(mutex_std, mutex_avg);
    printf("mutex sample_size %d\n", mutext_ss);
    int rwlock_ss = findSampleSize(rwlock_std, rwlock_avg);
    printf("rwlock sample_size %d\n", rwlock_ss);
    int serial_ss = findSampleSize(serial_std, serial_avg);
    printf("serial sample_size %d\n", serial_ss);

    // array with sample size
    float *mutext_time_case = malloc(sizeof(float) * mutext_ss);
    float *rwlock_time_case = malloc(sizeof(float) * rwlock_ss);  //=malloc(sizeof(float)*test_samples)
    float *serial_time_case = malloc(sizeof(float) * serial_ss);

    //mutex linked list implementation
    for (int i = 0; i < mutext_ss; i++) {
        mutext_time_case[i] = linkedListMutex(opsList, n_threads, m, n);
    }

    //rwlock linked list implementation
    for (int i = 0; i < rwlock_ss; i++) {
        rwlock_time_case[i] = linkedListRWLock(opsList, n_threads, m, n);
    }

    //serial linked list implementation
    for (int i = 0; i < serial_ss; i++) {
        serial_time_case[i] = serialLinkedList(opsList, m, n);
    }

    // final average time of each linked list implementation
    float final_mutex_avg = findAverage(mutext_time_case, mutext_ss);
    float final_rwlock_avg = findAverage(rwlock_time_case, rwlock_ss);
    float final_serial_avg = findAverage(serial_time_case, serial_ss);

    // final standard deviation values of each linked list implementation
    float final_mutex_std = std(mutext_time_case, mutext_ss);
    float final_rwlock_std = std(rwlock_time_case, rwlock_ss);
    float final_serial_std = std(serial_time_case, serial_ss);

    printf("mutex linked list average %f :  std %f\n", final_mutex_avg, final_mutex_std);
    printf("rwlock linked list average %f :  std %f\n", final_rwlock_avg, final_rwlock_std);
    printf("serial linked list average %f :  std %f\n", final_serial_avg, final_serial_std);

    return 0;
}
Пример #3
0
int main(int argc, const char **argv) {

  c0 = 0.500000000000000;
  rc0 = 1.0 / 280.0;
  rc1 = 4.0 / 105.0;
  rc2 = 1.0 / 5.0;
  rc3 = 4.0 / 5.0;
  nx0 = 1000;
  deltai0 = 0.00100000000000000;
  deltat = 0.000400000000000000;
  rkold[0] = 1.0 / 4.0;
  rkold[1] = 3.0 / 20.0;
  rkold[2] = 3.0 / 5.0;
  rknew[0] = 2.0 / 3.0;
  rknew[1] = 5.0 / 12.0;
  rknew[2] = 3.0 / 5.0;

  ops_init(argc, argv, 1);
  ops_init_backend();

  ops_decl_const2("c0", 1, "double", &c0);
  ops_decl_const2("rc0", 1, "double", &rc0);
  ops_decl_const2("rc1", 1, "double", &rc1);
  ops_decl_const2("rc2", 1, "double", &rc2);
  ops_decl_const2("rc3", 1, "double", &rc3);
  ops_decl_const2("nx0", 1, "int", &nx0);
  ops_decl_const2("deltai0", 1, "double", &deltai0);
  ops_decl_const2("deltat", 1, "double", &deltat);

  ops_block complex_numbers_block;

  complex_numbers_block = ops_decl_block(1, "complex_numbers_block");

  ops_dat phi;
  ops_dat phi_old;
  ops_dat wk0;
  ops_dat wk1;

  int halo_p[] = {4};
  int halo_m[] = {-4};
  int size[] = {nx0};
  int base[] = {0};
  double *val = NULL;
  phi = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val,
                     "double", "phi");
  phi_old = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p,
                         val, "double", "phi_old");
  wk0 = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val,
                     "double", "wk0");
  wk1 = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val,
                     "double", "wk1");

  int stencil1_temp[] = {0};
  ops_stencil stencil1 = ops_decl_stencil(1, 1, stencil1_temp, "0");
  int stencil0_temp[] = {-4, -3, -2, -1, 1, 2, 3, 4};
  ops_stencil stencil0 =
      ops_decl_stencil(1, 8, stencil0_temp, "-4,-3,-2,-1,1,2,3,4");

  ops_reduction real =
      ops_decl_reduction_handle(sizeof(double), "double", "reduction_real");
  ops_reduction imaginary = ops_decl_reduction_handle(sizeof(double), "double",
                                                      "reduction_imaginary");

  ops_halo_group halo_exchange0;
  {
    int halo_iter[] = {4};
    int from_base[] = {0};
    int to_base[] = {nx0};
    int dir[] = {1};
    ops_halo halo0 =
        ops_decl_halo(phi, phi, halo_iter, from_base, to_base, dir, dir);
    ops_halo grp[] = {halo0};
    halo_exchange0 = ops_decl_halo_group(1, grp);
  }

  ops_halo_group halo_exchange1;
  {
    int halo_iter[] = {4};
    int from_base[] = {nx0 - 4};
    int to_base[] = {-4};
    int dir[] = {1};
    ops_halo halo0 =
        ops_decl_halo(phi, phi, halo_iter, from_base, to_base, dir, dir);
    ops_halo grp[] = {halo0};
    halo_exchange1 = ops_decl_halo_group(1, grp);
  }

  ops_partition("");

  int iter_range5[] = {-4, nx0 + 4};
  ops_par_loop_complex_numbers_block0_5_kernel(
      "Initialisation", complex_numbers_block, 1, iter_range5,
      ops_arg_dat(phi, 1, stencil1, "double", OPS_WRITE), ops_arg_idx());

  ops_halo_transfer(halo_exchange0);

  ops_halo_transfer(halo_exchange1);

  double cpu_start, elapsed_start;
  ops_timers(&cpu_start, &elapsed_start);

  for (int iteration = 0; iteration < 1; iteration++) {

    int iter_range4[] = {-4, nx0 + 4};
    ops_par_loop_complex_numbers_block0_4_kernel(
        "Save equations", complex_numbers_block, 1, iter_range4,
        ops_arg_dat(phi, 1, stencil1, "double", OPS_READ),
        ops_arg_dat(phi_old, 1, stencil1, "double", OPS_WRITE));

    for (int stage = 0; stage < 3; stage++) {

      int iter_range0[] = {0, nx0};
      ops_par_loop_complex_numbers_block0_0_kernel(
          "D(phi[x0 t] x0)", complex_numbers_block, 1, iter_range0,
          ops_arg_dat(phi, 1, stencil0, "double", OPS_READ),
          ops_arg_dat(wk0, 1, stencil1, "double", OPS_WRITE));

      int iter_range1[] = {0, nx0};
      ops_par_loop_complex_numbers_block0_1_kernel(
          "Residual of equation", complex_numbers_block, 1, iter_range1,
          ops_arg_dat(wk0, 1, stencil1, "double", OPS_READ),
          ops_arg_dat(wk1, 1, stencil1, "double", OPS_WRITE));

      int iter_range2[] = {-4, nx0 + 4};
      ops_par_loop_complex_numbers_block0_2_kernel(
          "RK new (subloop) update", complex_numbers_block, 1, iter_range2,
          ops_arg_dat(phi_old, 1, stencil1, "double", OPS_READ),
          ops_arg_dat(wk1, 1, stencil1, "double", OPS_READ),
          ops_arg_dat(phi, 1, stencil1, "double", OPS_WRITE),
          ops_arg_gbl(&rknew[stage], 1, "double", OPS_READ));

      int iter_range3[] = {-4, nx0 + 4};
      ops_par_loop_complex_numbers_block0_3_kernel(
          "RK old update", complex_numbers_block, 1, iter_range3,
          ops_arg_dat(wk1, 1, stencil1, "double", OPS_READ),
          ops_arg_dat(phi_old, 1, stencil1, "double", OPS_RW),
          ops_arg_gbl(&rkold[stage], 1, "double", OPS_READ));

      ops_halo_transfer(halo_exchange0);

      ops_halo_transfer(halo_exchange1);
    }

    int iter_range0[] = {0, nx0};
    ops_par_loop_complex_numbers_block0_cn_kernel(
        "Complex numbers", complex_numbers_block, 1, iter_range0,
        ops_arg_dat(phi, 1, stencil0, "double", OPS_READ),
        ops_arg_reduce(real, 1, "double", OPS_INC),
        ops_arg_reduce(imaginary, 1, "double", OPS_INC));
  }

  double cpu_end, elapsed_end;
  ops_timers(&cpu_end, &elapsed_end);

  ops_printf("\nTimings are:\n");
  ops_printf("-----------------------------------------\n");
  ops_printf("Total Wall time %lf\n", elapsed_end - elapsed_start);

  ops_fetch_block_hdf5_file(complex_numbers_block, "complex_numbers_2500.h5");
  ops_fetch_dat_hdf5_file(phi, "complex_numbers_2500.h5");

  ops_exit();
}
Пример #4
0
int main(int argc,char **argv)
    {
    const char *keyfile;
    const char *plainfile;
    const char *user_id;
    const char *hashstr;
    const char *sigfile;
    ops_secret_key_t *skey;
    ops_create_signature_t *sig;
    ops_hash_algorithm_t alg;
    int fd;
    ops_create_info_t *info;
    unsigned char keyid[OPS_KEY_ID_SIZE];

    if(argc != 6)
	{
	fprintf(stderr,"%s <secret key file> <user_id> <hash> <plaintext file>"
		" <signature file>\n",argv[0]);
	exit(1);
	}

    keyfile=argv[1];
    user_id=argv[2];
    hashstr=argv[3];
    plainfile=argv[4];
    sigfile=argv[5];

    ops_init();

    skey=get_secret_key(keyfile);
    assert(skey);

    alg=ops_hash_algorithm_from_text(hashstr);
    if(alg == OPS_HASH_UNKNOWN)
	{
	fprintf(stderr,"Unkonwn hash algorithm: %s\n",hashstr);
	exit(2);
	}

    sig=ops_create_signature_new();
    ops_signature_start_cleartext_signature(sig,skey,alg,OPS_SIG_BINARY);

    fd=open(plainfile,O_RDONLY);
    if(fd < 0)
	{
	perror(plainfile);
	exit(3);
	}

    for( ; ; )
	{
	unsigned char buf[8192];
	int n;
	
	n=read(fd,buf,sizeof buf);
	if(!n)
	    break;
	if(n < 0)
	    {
	    perror(plainfile);
	    exit(4);
	    }
	ops_signature_add_data(sig,buf,n);
	}

    close(fd);

    ops_signature_add_creation_time(sig,time(NULL));

    ops_keyid(keyid,&skey->public_key);
    ops_signature_add_issuer_key_id(sig,keyid);

    ops_signature_hashed_subpackets_end(sig);

    fd=open(sigfile,O_CREAT|O_TRUNC|O_WRONLY,0666);
    if(fd < 0)
	{
	perror(sigfile);
	exit(5);
	}

    info=ops_create_info_new();
    ops_writer_set_fd(info,fd);

    ops_write_signature(sig,&skey->public_key,skey,info);

    ops_secret_key_free(skey);

    return 0;
    }
Пример #5
0
int main(int argc, char **argv)
{
  /**-------------------------- Initialisation --------------------------**/

  // OPS initialisation
  ops_init(argc,argv,6);

  int logical_size_x = 200;
  int logical_size_y = 200;
  int ngrid_x = 1;
  int ngrid_y = 1;
  int n_iter = 10000;
  dx = 0.01;
  dy = 0.01;
  ops_decl_const("dx",1,"double",&dx);
  ops_decl_const("dy",1,"double",&dy);

  //declare blocks
  ops_block *blocks = (ops_block *)malloc(ngrid_x*ngrid_y*sizeof(ops_block*));
  char buf[50];
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      sprintf(buf,"block %d,%d",i,j);
      blocks[i+ngrid_x*j] = ops_decl_block(2,buf);
    }
  }

  //declare stencils
  int s2D_00[]         = {0,0};
  ops_stencil S2D_00 = ops_decl_stencil( 2, 1, s2D_00, "00");
  int s2D_00_P10_M10_0P1_0M1[]         = {0,0, 1,0, -1,0, 0,1, 0,-1};
  ops_stencil S2D_00_P10_M10_0P1_0M1 = ops_decl_stencil( 2, 5, s2D_00_P10_M10_0P1_0M1, "00:10:-10:01:0-1");

  ops_reduction red_err = ops_decl_reduction_handle(sizeof(double), "double", "err");

  //declare datasets
  int d_p[2] = {1,1}; //max halo depths for the dat in the possitive direction
  int d_m[2] = {-1,-1}; //max halo depths for the dat in the negative direction
  int base[2] = {0,0};
  int uniform_size[2] = {(logical_size_x-1)/ngrid_x+1,(logical_size_y-1)/ngrid_y+1};
  double* temp = NULL;
  ops_dat *coordx = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *coordy = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *u = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *u2 = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *f = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *ref = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  int *sizes = (int*)malloc(2*ngrid_x*ngrid_y*sizeof(int));
  int *disps = (int*)malloc(2*ngrid_x*ngrid_y*sizeof(int));

  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      int size[2] = {uniform_size[0], uniform_size[1]};
      if ((i+1)*size[0]>logical_size_x) size[0] = logical_size_x - i*size[0];
      if ((j+1)*size[1]>logical_size_y) size[1] = logical_size_y - j*size[1];
      sprintf(buf,"coordx %d,%d",i,j);
      coordx[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"coordy %d,%d",i,j);
      coordy[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"u %d,%d",i,j);
      u[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"u2 %d,%d",i,j);
      u2[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"f %d,%d",i,j);
      f[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"ref %d,%d",i,j);
      ref[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);

      sizes[2*(i+ngrid_x*j)]   = size[0];
      sizes[2*(i+ngrid_x*j)+1] = size[1];
      disps[2*(i+ngrid_x*j)]   = i*uniform_size[0];
      disps[2*(i+ngrid_x*j)+1] = j*uniform_size[1];
    }
  }

  ops_halo *halos = (ops_halo *)malloc(2*(ngrid_x*(ngrid_y-1)+(ngrid_x-1)*ngrid_y)*sizeof(ops_halo *));
  int off = 0;
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      if (i > 0) {
        int halo_iter[] = {1,sizes[2*(i+ngrid_x*j)+1]};
        int base_from[] = {sizes[2*(i-1+ngrid_x*j)]-1,0};
        int base_to[] = {-1,0};
        int dir[] = {1,2};
        halos[off++] = ops_decl_halo(u[i-1+ngrid_x*j], u[i+ngrid_x*j], halo_iter, base_from, base_to, dir, dir);
        base_from[0] = 0; base_to[0] = sizes[2*(i+ngrid_x*j)];
        halos[off++] = ops_decl_halo(u[i+ngrid_x*j], u[i-1+ngrid_x*j], halo_iter, base_from, base_to, dir, dir);
      }
      if (j > 0) {
        int halo_iter[] = {sizes[2*(i+ngrid_x*j)],1};
        int base_from[] = {0,sizes[2*(i+ngrid_x*(j-1))+1]-1};
        int base_to[] = {0,-1};
        int dir[] = {1,2};
        halos[off++] = ops_decl_halo(u[i+ngrid_x*(j-1)], u[i+ngrid_x*j], halo_iter, base_from, base_to, dir, dir);
        base_from[1] = 0; base_to[1] = sizes[2*(i+ngrid_x*j)+1];
        halos[off++] = ops_decl_halo(u[i+ngrid_x*j], u[i+ngrid_x*(j-1)], halo_iter, base_from, base_to, dir, dir);
      }
    }
  }
  if (off != 2*(ngrid_x*(ngrid_y-1)+(ngrid_x-1)*ngrid_y)) printf("Something is not right\n");
  ops_halo_group u_halos = ops_decl_halo_group(off,halos);

  ops_partition("");
  ops_checkpointing_init("check.h5", 5.0);
  /**-------------------------- Computations --------------------------**/


  double ct0, ct1, et0, et1;
  ops_timers_core(&ct0, &et0);

  //populate forcing, reference solution and boundary conditions
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      int iter_range[] = {-1,sizes[2*(i+ngrid_x*j)]+1,-1,sizes[2*(i+ngrid_x*j)+1]+1};
      ops_par_loop(poisson_kernel_populate, "poisson_kernel_populate", blocks[i+ngrid_x*j], 2, iter_range,
               ops_arg_gbl(&disps[2*(i+ngrid_x*j)], 1, "int", OPS_READ),
               ops_arg_gbl(&disps[2*(i+ngrid_x*j)+1], 1, "int", OPS_READ),
               ops_arg_idx(),
               ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_WRITE),
               ops_arg_dat(f[i+ngrid_x*j], S2D_00, "double", OPS_WRITE),
               ops_arg_dat(ref[i+ngrid_x*j], S2D_00, "double", OPS_WRITE));
    }
  }

  //initial guess 0
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]};
      ops_par_loop(poisson_kernel_initialguess, "poisson_kernel_initialguess", blocks[i+ngrid_x*j], 2, iter_range,
               ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_WRITE));
    }
  }

  for (int iter = 0; iter < n_iter; iter++) {
    ops_halo_transfer(u_halos);
    for (int j = 0; j < ngrid_y; j++) {
      for (int i = 0; i < ngrid_x; i++) {
        int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]};
        ops_par_loop(poisson_kernel_stencil, "poisson_kernel_stencil", blocks[i+ngrid_x*j], 2, iter_range,
                 ops_arg_dat(u[i+ngrid_x*j], S2D_00_P10_M10_0P1_0M1, "double", OPS_READ),
                 ops_arg_dat(f[i+ngrid_x*j], S2D_00, "double", OPS_READ),
                 ops_arg_dat(u2[i+ngrid_x*j], S2D_00, "double", OPS_WRITE));
      }
    }
    for (int j = 0; j < ngrid_y; j++) {
      for (int i = 0; i < ngrid_x; i++) {
        int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]};
        ops_par_loop(poisson_kernel_update, "poisson_kernel_update", blocks[i+ngrid_x*j], 2, iter_range,
                 ops_arg_dat(u2[i+ngrid_x*j], S2D_00, "double", OPS_READ),
                 ops_arg_dat(u[i+ngrid_x*j] , S2D_00, "double", OPS_WRITE));
      }
    }
  }

  double err = 0.0;
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]};
      ops_par_loop(poisson_kernel_error, "poisson_kernel_error", blocks[i+ngrid_x*j], 2, iter_range,
               ops_arg_dat(u[i+ngrid_x*j],    S2D_00, "double", OPS_READ),
               ops_arg_dat(ref[i+ngrid_x*j] , S2D_00, "double", OPS_READ),
               ops_arg_reduce(red_err, 1, "double", OPS_INC));
    }
  }

  ops_reduction_result(red_err,&err);
  ops_printf("Total error: %g\n",err);

  ops_timers_core(&ct1, &et1);
  ops_timing_output();

  ops_printf("\nTotal Wall time %lf\n",et1-et0);

  ops_exit();
}