Пример #1
0
int main(int argc, char** argv) {
    if (argc < 2) {
        printf("usage: deferred_shading <input_file (e.g. data/pp1280x720.bin)> [tasks iterations] [serial iterations]\n");
        return 1;
    }
    static unsigned int test_iterations[] = {5, 3, 500}; //last value is for nframes, it is scale.
    if (argc == 5) {
        for (int i = 0; i < 3; i++) {
            test_iterations[i] = atoi(argv[2 + i]);
        }
    }

    InputData *input = CreateInputDataFromFile(argv[1]);
    if (!input) {
        printf("Failed to load input file \"%s\"!\n", argv[1]);
        return 1;
    }

    Framebuffer framebuffer(input->header.framebufferWidth,
                            input->header.framebufferHeight);

    int nframes = test_iterations[2];
    double ispcCycles = 1e30;
    for (int i = 0; i < test_iterations[0]; ++i) {
        framebuffer.clear();
        reset_and_start_timer();
        for (int j = 0; j < nframes; ++j)
            ispc::RenderStatic(&input->header, &input->arrays,
                               VISUALIZE_LIGHT_COUNT,
                               framebuffer.r, framebuffer.g, framebuffer.b);
        double msec = get_elapsed_msec() / nframes;
        printf("@time of ISPC + TASKS run:\t\t\t[%.3f] msec [%.3f fps]\n", msec, 1.0e3/msec);
        ispcCycles = std::min(ispcCycles, msec);
    }
    printf("[ispc static + tasks]:\t\t[%.3f] msec to render "
           "%d x %d image\n", ispcCycles,
           input->header.framebufferWidth, input->header.framebufferHeight);
    WriteFrame("deferred-ispc-static.ppm", input, framebuffer);

    DeleteInputData(input);

    return 0;
}
Пример #2
0
int main(int argc, char* argv[])
{
  int i;
  unsigned int SIZEX = 0;
  unsigned int SIZEY = 0;
  unsigned int SIZEZ = 0;
  if (argc == 4) {
    sscanf(argv [1], "%d", &SIZEX);
    sscanf(argv [2], "%d", &SIZEY);
    sscanf(argv [3], "%d", &SIZEZ);
  }else{
    printf("usage: %s xdim ydim zdim\n", argv [0]);
    return -1;
  }

  unsigned int SIZE = SIZEX * SIZEY * SIZEZ;
  assert(SIZE > 0);
  printf("allocation size> %d\n", SIZE);

  float *a = (float*)malloc(sizeof(float) * SIZE);
  float *b = (float*)malloc(sizeof(float) * SIZE);
  float *c = (float*)malloc(sizeof(float) * SIZE);


    #ifdef __NVCUDA__
  acc_init(acc_device_nvcuda);
    #endif
    #ifdef __NVOPENCL__
  
    #define DEVICE_TYPE acc_device_nvocl 
  printf("compiled for ocl\n");
  acc_init(DEVICE_TYPE);
  acc_list_devices_spec(DEVICE_TYPE);
    #endif


  
  for (i = 0; i < SIZE; ++i) {
    a [i] = (float)i;
    b [i] = (float)2 * i;
    c [i] = 0.0f;
  }

  
  int k;
  double revsum = 0;
  int iter = 30;
    

	ipmacc_prompt((char*)"IPMACC: memory allocation c\n");
// ISPC host and device are the same, skipping memory allocation
ipmacc_prompt((char*)"IPMACC: memory allocation a\n");
// ISPC host and device are the same, skipping memory allocation
ipmacc_prompt((char*)"IPMACC: memory allocation b\n");
// ISPC host and device are the same, skipping memory allocation
	ipmacc_prompt((char*)"IPMACC: memory copyin a\n");
// ISPC host and device are the same, skipping copyin
ipmacc_prompt((char*)"IPMACC: memory copyin b\n");
// ISPC host and device are the same, skipping copyin


{


  
		for(k = 0; k < iter; k++)
 {
    reset_and_start_timer();
        


/* kernel call statement*/
{

unsigned int __ispc_n_threads = sysconf(_SC_NPROCESSORS_ONLN); // acc_get_n_cores(acc_device_intelispc);
if(getenv("IPMACC_VERBOSE")) printf("IPMACC: Launching ISPC kernel> %d threads + SIMD \n", __ispc_n_threads);
__generated_kernel_launch_0(a,c,b,SIZE);
}
/* kernel call statement*/
// ISPC target is synchronized with CPU
// skipping synchronization



    double dt = get_elapsed_msec();
    revsum += 1.0 / dt;
    printf("@time of openacc run:\t\t\t%.3f msec\n", dt);
  }


}
	ipmacc_prompt((char*)"IPMACC: memory copyout c\n");
// ISPC host and device are the same, skipping copyout



  printf("harmonic mean openacc run> %.3f msec\n", iter / revsum);

  
  
  
  
  for (i = 0; i < SIZE; ++i) {
    if (c [i] != (a [i] + b [i])) {
      fprintf(stdout, "Error %d %16.10f!=%16.10f \n", i, c [i], a [i] + b [i]);
      return -1;
    }
  }

  fprintf(stdout, "OpenACC vectoradd test was successful!\n");
  return 0;
}
Пример #3
0
int main(int argc, char *argv[]) {
    static unsigned int test_iterations[] = {1, 1, 1};//the last two numbers must be equal here
    int Nx = 256, Ny = 256, Nz = 256;
    int width = 4;

    if (argc > 1) {
        if (strncmp(argv[1], "--scale=", 8) == 0) {
            RealType scale = atof(argv[1] + 8);
            Nx = Nx * scale;
            Ny = Ny * scale;
            Nz = Nz * scale;
        }
    }
    if ((argc == 4) || (argc == 5)) {
        for (int i = 0; i < 3; i++) {
            test_iterations[i] = atoi(argv[argc - 3 + i]);
        }
    }

    RealType *Aserial[2], *Aispc[2];
    Aserial[0] = new RealType [Nx * Ny * Nz];
    Aserial[1] = new RealType [Nx * Ny * Nz];
    Aispc[0] = new RealType [Nx * Ny * Nz];
    Aispc[1] = new RealType [Nx * Ny * Nz];
    RealType *vsq = new RealType [Nx * Ny * Nz];

    RealType coeff[4] = { 0.5, -.25, .125, -.0625 }; 

    //    InitData(Nx, Ny, Nz, Aispc, vsq);
    //
    // Compute the image using the ispc implementation on one core; report
    // the minimum time of three runs.
    //
    double minTimeISPC = 1e30;
    // for (unsigned int i = 0; i < test_iterations[0]; ++i) {
    //     reset_and_start_timer();
    //     loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width,
    //                       width, Nz - width, Nx, Ny, Nz, coeff, vsq,
    //                       Aispc[0], Aispc[1]);
    //     double dt = get_elapsed_mcycles();
    //     printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt);
    //     minTimeISPC = std::min(minTimeISPC, dt);
    // }

    // printf("[stencil ispc 1 core]:\t\t[%.3f] million cycles\n", minTimeISPC);

    // InitData(Nx, Ny, Nz, Aispc, vsq);

    // //
    // // Compute the image using the ispc implementation with tasks; report
    // // the minimum time of three runs.
    // //
    double minTimeISPCTasks = 1e30;
    // for (unsigned int i = 0; i < test_iterations[1]; ++i) {
    //     reset_and_start_timer();
    //     loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width,
    //                             width, Nz - width, Nx, Ny, Nz, coeff, vsq,
    //                             Aispc[0], Aispc[1]);
    //     double dt = get_elapsed_mcycles();
    //     printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt);
    //     minTimeISPCTasks = std::min(minTimeISPCTasks, dt);
    // }

    //    printf("[stencil ispc + tasks]:\t\t[%.3f] million cycles\n", minTimeISPCTasks);

    InitData(Nx, Ny, Nz, Aserial, vsq);

    // 
    // And run the serial implementation 3 times, again reporting the
    // minimum time.
    //

    double minTimeSerial = 1e30;
    for (unsigned int i = 0; i < test_iterations[2]; ++i) {
        reset_and_start_timer();
        loop_stencil_serial(0, 6, width, Nx-width, width, Ny - width,
                            width, Nz - width, Nx, Ny, Nz, coeff, vsq,
                            Aserial[0], Aserial[1]);
        double dt = get_elapsed_msec();
        printf("@time of serial run:\t\t\t[%.3f] milli secondes\n", dt);
	//    minTimeSerial = std::min(minTimeSerial, dt);
    }


      
    // printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", 
    //        minTimeSerial / minTimeISPC, minTimeSerial / minTimeISPCTasks);

    // Check for agreement
    int offset = 0;
    RealType norm=0;
    for (int z = 0; z < Nz; ++z){
      for (int y = 0; y < Ny; ++y){
	for (int x = 0; x < Nx; ++x, ++offset) {
	  RealType value= Aserial[1][offset];
	  norm += value*value;
	}
      }
    }
    std::cout << std::setprecision(16)<< "norm: " << sqrt(norm)<<std::endl;
}