コード例 #1
0
ファイル: spop.cpp プロジェクト: ablimit/hadoopgis
int main(int argc, char** argv) {
  float area[2];
  float aTime = 0.0;
  float bTime = 0.0;
  
  int reps = argc > 1 ? atoi(argv[1]) :100;

  printf("ProgramCount [%d], size-of-float[%lu], size-of-float[%lu]\n",ispc::get_programCount(),sizeof(float), sizeof(float));
  
    for (int i =0; i<reps; i++) {
    reset_and_start_timer();
    area[0]= ptarray_signed_area_aos(polygon_aos,npoints);
    aTime += get_elapsed_mcycles();
  }

  for (int i =0; i<reps; i++) {
    reset_and_start_timer();
    area[1]= ispc::ptarray_signed_area_aos(polygon_aos,npoints);
    bTime += get_elapsed_mcycles();
  }

  printf("%-20s: [%.2f] M cycles %s, [%.2f] M cycles %s (%.2fx speedup).\n",
      "ST_AREA", aTime, "serial_aos", bTime, "ispc_aos",
      aTime/bTime);
  printf("%-20s: serial [%.2f], ispc [%.2f]\n", "results", area[0], area[1]);


  aTime = 0.0;
  bTime = 0.0;
  for (int i =0; i<reps; i++) {
    reset_and_start_timer();
    area[0]= ptarray_signed_area_soa(polygon_soa,npoints);
    aTime += get_elapsed_mcycles();
  }
  printf("sequential code is fine.\n");
 
  for (int i =0; i<reps; i++) {
    reset_and_start_timer();
    area[1]= ispc::ptarray_signed_area_soa(polygon_aos,npoints);
    bTime += get_elapsed_mcycles();
  }
  printf("%-20s: [%.2f] M cycles %s, [%.2f] M cycles %s (%.2fx speedup).\n",
      "ST_AREA", aTime, "serial_soa", bTime, "ispc_soa",
      aTime/bTime);
  printf("%-20s: serial [%.2f], ispc [%.2f]\n", "results", area[0], area[1]);

  return 0;
}
コード例 #2
0
ファイル: main.cpp プロジェクト: Amos-zq/ispc
int main(int argc, char** argv) {
    if (argc < 2) {
        printf("usage: deferred_shading <input_file (e.g. data/pp1280x720.bin)> [tasks iterations] [serial iterations]\n");
        return 1;
    }
    static unsigned int test_iterations[] = {5, 3, 500}; //last value is for nframes, it is scale.
    if (argc == 5) {
        for (int i = 0; i < 3; i++) {
            test_iterations[i] = atoi(argv[2 + i]);
        }
    }

    InputData *input = CreateInputDataFromFile(argv[1]);
    if (!input) {
        printf("Failed to load input file \"%s\"!\n", argv[1]);
        return 1;
    }

    Framebuffer framebuffer(input->header.framebufferWidth,
                            input->header.framebufferHeight);

    int nframes = test_iterations[2];
    double ispcCycles = 1e30;
    for (int i = 0; i < test_iterations[0]; ++i) {
        framebuffer.clear();
        reset_and_start_timer();
        for (int j = 0; j < nframes; ++j)
            ispc::RenderStatic(&input->header, &input->arrays,
                               VISUALIZE_LIGHT_COUNT,
                               framebuffer.r, framebuffer.g, framebuffer.b);
        double msec = get_elapsed_msec() / nframes;
        printf("@time of ISPC + TASKS run:\t\t\t[%.3f] msec [%.3f fps]\n", msec, 1.0e3/msec);
        ispcCycles = std::min(ispcCycles, msec);
    }
    printf("[ispc static + tasks]:\t\t[%.3f] msec to render "
           "%d x %d image\n", ispcCycles,
           input->header.framebufferWidth, input->header.framebufferHeight);
    WriteFrame("deferred-ispc-static.ppm", input, framebuffer);

    DeleteInputData(input);

    return 0;
}
コード例 #3
0
void mandelbrot_threads(float x0, float y0, float x1, float y1,
                       int width,int width_start, int width_end, int height, int maxIterations,
                       int output[])
{
    float dx = (x1 - x0) / width;
    float dy = (y1 - y0) / height;
    int flag=1;
    reset_and_start_timer();
    for (int j = 0; j < height; j++) {
        for (int i = width_start; i < width_end; ++i) {
            float x = x0 + i * dx;
            float y = y0 + j * dy;
            int index = (j * width + i);	    
            output[index] = mandel(x, y, maxIterations);
        }
    }
    double dt = get_elapsed_mcycles();
    printf("\n\t\t\t Thread took :\t[%.3f] millon cycles",dt);
}
コード例 #4
0
int main(int argc, char ** argv) {
    if (argc < 4) {
        printf("Please input M, N and K\n");
        return 1;
    }
    int row = atoi(argv[1]);
    int col = atoi(argv[2]);
    int num_iterate = atoi(argv[3]);

    GameOfLife* game;
    if (row <= 0 || col <= 0) {
        game = new GameOfLife(6, 4);
        game->specificInit();
    }
    else {
        game = new GameOfLife(row, col);
        game->randomInit();
    }
    if (game->notTooLarge()) {
        game->print();
    }

    // start to record time consumption
    reset_and_start_timer();
    
    game->iterateAll(num_iterate);

   // stop timer and print out total cycles
    double one_round = get_elapsed_mcycles();
    if (game->notTooLarge()) {
        game->print();
    }
    printf("\n-------- Statistic Infomation --------\n\n");
    printf("time consumption:\t\t\t[%.3f] million cycles\n", one_round);

    delete(game);
    return 0;
}
コード例 #5
0
ファイル: main.cpp プロジェクト: UIKit0/ispc
int main(int argc, char** argv) {
    if (argc != 2) {
        printf("usage: deferred_shading <input_file (e.g. data/pp1280x720.bin)>\n");
        return 1;
    }

    InputData *input = CreateInputDataFromFile(argv[1]);
    if (!input) {
        printf("Failed to load input file \"%s\"!\n", argv[1]);
        return 1;
    }

    Framebuffer framebuffer(input->header.framebufferWidth,
                            input->header.framebufferHeight);

    InitDynamicC(input);
#ifdef __cilk
    InitDynamicCilk(input);
#endif // __cilk

    int nframes = 5;
    double ispcCycles = 1e30;
    for (int i = 0; i < 5; ++i) {
        framebuffer.clear();
        reset_and_start_timer();
        for (int j = 0; j < nframes; ++j)
            ispc::RenderStatic(input->header, input->arrays,
                               VISUALIZE_LIGHT_COUNT,
                               framebuffer.r, framebuffer.g, framebuffer.b);
        double mcycles = get_elapsed_mcycles() / nframes;
        ispcCycles = std::min(ispcCycles, mcycles);
    }
    printf("[ispc static + tasks]:\t\t[%.3f] million cycles to render "
           "%d x %d image\n", ispcCycles,
           input->header.framebufferWidth, input->header.framebufferHeight);
    WriteFrame("deferred-ispc-static.ppm", input, framebuffer);

#ifdef __cilk
    double dynamicCilkCycles = 1e30;
    for (int i = 0; i < 5; ++i) {
        framebuffer.clear();
        reset_and_start_timer();
        for (int j = 0; j < nframes; ++j)
            DispatchDynamicCilk(input, &framebuffer);
        double mcycles = get_elapsed_mcycles() / nframes;
        dynamicCilkCycles = std::min(dynamicCilkCycles, mcycles);
    }
    printf("[ispc + Cilk dynamic]:\t\t[%.3f] million cycles to render image\n", 
           dynamicCilkCycles);
    WriteFrame("deferred-ispc-dynamic.ppm", input, framebuffer);
#endif // __cilk

    double serialCycles = 1e30;
    for (int i = 0; i < 5; ++i) {
        framebuffer.clear();
        reset_and_start_timer();
        for (int j = 0; j < nframes; ++j)
            DispatchDynamicC(input, &framebuffer);
        double mcycles = get_elapsed_mcycles() / nframes;
        serialCycles = std::min(serialCycles, mcycles);
    }
    printf("[C++ serial dynamic, 1 core]:\t[%.3f] million cycles to render image\n", 
           serialCycles);
    WriteFrame("deferred-serial-dynamic.ppm", input, framebuffer);

#ifdef __cilk
    printf("\t\t\t\t(%.2fx speedup from static ISPC, %.2fx from Cilk+ISPC)\n", 
           serialCycles/ispcCycles, serialCycles/dynamicCilkCycles);
#else
    printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", serialCycles/ispcCycles);
#endif // __cilk

    DeleteInputData(input);

    return 0;
}
コード例 #6
0
ファイル: ispc_add_1d.c プロジェクト: lashgar/ipmacc
int main(int argc, char* argv[])
{
  int i;
  unsigned int SIZEX = 0;
  unsigned int SIZEY = 0;
  unsigned int SIZEZ = 0;
  if (argc == 4) {
    sscanf(argv [1], "%d", &SIZEX);
    sscanf(argv [2], "%d", &SIZEY);
    sscanf(argv [3], "%d", &SIZEZ);
  }else{
    printf("usage: %s xdim ydim zdim\n", argv [0]);
    return -1;
  }

  unsigned int SIZE = SIZEX * SIZEY * SIZEZ;
  assert(SIZE > 0);
  printf("allocation size> %d\n", SIZE);

  float *a = (float*)malloc(sizeof(float) * SIZE);
  float *b = (float*)malloc(sizeof(float) * SIZE);
  float *c = (float*)malloc(sizeof(float) * SIZE);


    #ifdef __NVCUDA__
  acc_init(acc_device_nvcuda);
    #endif
    #ifdef __NVOPENCL__
  
    #define DEVICE_TYPE acc_device_nvocl 
  printf("compiled for ocl\n");
  acc_init(DEVICE_TYPE);
  acc_list_devices_spec(DEVICE_TYPE);
    #endif


  
  for (i = 0; i < SIZE; ++i) {
    a [i] = (float)i;
    b [i] = (float)2 * i;
    c [i] = 0.0f;
  }

  
  int k;
  double revsum = 0;
  int iter = 30;
    

	ipmacc_prompt((char*)"IPMACC: memory allocation c\n");
// ISPC host and device are the same, skipping memory allocation
ipmacc_prompt((char*)"IPMACC: memory allocation a\n");
// ISPC host and device are the same, skipping memory allocation
ipmacc_prompt((char*)"IPMACC: memory allocation b\n");
// ISPC host and device are the same, skipping memory allocation
	ipmacc_prompt((char*)"IPMACC: memory copyin a\n");
// ISPC host and device are the same, skipping copyin
ipmacc_prompt((char*)"IPMACC: memory copyin b\n");
// ISPC host and device are the same, skipping copyin


{


  
		for(k = 0; k < iter; k++)
 {
    reset_and_start_timer();
        


/* kernel call statement*/
{

unsigned int __ispc_n_threads = sysconf(_SC_NPROCESSORS_ONLN); // acc_get_n_cores(acc_device_intelispc);
if(getenv("IPMACC_VERBOSE")) printf("IPMACC: Launching ISPC kernel> %d threads + SIMD \n", __ispc_n_threads);
__generated_kernel_launch_0(a,c,b,SIZE);
}
/* kernel call statement*/
// ISPC target is synchronized with CPU
// skipping synchronization



    double dt = get_elapsed_msec();
    revsum += 1.0 / dt;
    printf("@time of openacc run:\t\t\t%.3f msec\n", dt);
  }


}
	ipmacc_prompt((char*)"IPMACC: memory copyout c\n");
// ISPC host and device are the same, skipping copyout



  printf("harmonic mean openacc run> %.3f msec\n", iter / revsum);

  
  
  
  
  for (i = 0; i < SIZE; ++i) {
    if (c [i] != (a [i] + b [i])) {
      fprintf(stdout, "Error %d %16.10f!=%16.10f \n", i, c [i], a [i] + b [i]);
      return -1;
    }
  }

  fprintf(stdout, "OpenACC vectoradd test was successful!\n");
  return 0;
}
コード例 #7
0
ファイル: stencil.cpp プロジェクト: edf-hpc/verrou
int main(int argc, char *argv[]) {
    static unsigned int test_iterations[] = {1, 1, 1};//the last two numbers must be equal here
    int Nx = 256, Ny = 256, Nz = 256;
    int width = 4;

    if (argc > 1) {
        if (strncmp(argv[1], "--scale=", 8) == 0) {
            RealType scale = atof(argv[1] + 8);
            Nx = Nx * scale;
            Ny = Ny * scale;
            Nz = Nz * scale;
        }
    }
    if ((argc == 4) || (argc == 5)) {
        for (int i = 0; i < 3; i++) {
            test_iterations[i] = atoi(argv[argc - 3 + i]);
        }
    }

    RealType *Aserial[2], *Aispc[2];
    Aserial[0] = new RealType [Nx * Ny * Nz];
    Aserial[1] = new RealType [Nx * Ny * Nz];
    Aispc[0] = new RealType [Nx * Ny * Nz];
    Aispc[1] = new RealType [Nx * Ny * Nz];
    RealType *vsq = new RealType [Nx * Ny * Nz];

    RealType coeff[4] = { 0.5, -.25, .125, -.0625 }; 

    //    InitData(Nx, Ny, Nz, Aispc, vsq);
    //
    // Compute the image using the ispc implementation on one core; report
    // the minimum time of three runs.
    //
    double minTimeISPC = 1e30;
    // for (unsigned int i = 0; i < test_iterations[0]; ++i) {
    //     reset_and_start_timer();
    //     loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width,
    //                       width, Nz - width, Nx, Ny, Nz, coeff, vsq,
    //                       Aispc[0], Aispc[1]);
    //     double dt = get_elapsed_mcycles();
    //     printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt);
    //     minTimeISPC = std::min(minTimeISPC, dt);
    // }

    // printf("[stencil ispc 1 core]:\t\t[%.3f] million cycles\n", minTimeISPC);

    // InitData(Nx, Ny, Nz, Aispc, vsq);

    // //
    // // Compute the image using the ispc implementation with tasks; report
    // // the minimum time of three runs.
    // //
    double minTimeISPCTasks = 1e30;
    // for (unsigned int i = 0; i < test_iterations[1]; ++i) {
    //     reset_and_start_timer();
    //     loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width,
    //                             width, Nz - width, Nx, Ny, Nz, coeff, vsq,
    //                             Aispc[0], Aispc[1]);
    //     double dt = get_elapsed_mcycles();
    //     printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt);
    //     minTimeISPCTasks = std::min(minTimeISPCTasks, dt);
    // }

    //    printf("[stencil ispc + tasks]:\t\t[%.3f] million cycles\n", minTimeISPCTasks);

    InitData(Nx, Ny, Nz, Aserial, vsq);

    // 
    // And run the serial implementation 3 times, again reporting the
    // minimum time.
    //

    double minTimeSerial = 1e30;
    for (unsigned int i = 0; i < test_iterations[2]; ++i) {
        reset_and_start_timer();
        loop_stencil_serial(0, 6, width, Nx-width, width, Ny - width,
                            width, Nz - width, Nx, Ny, Nz, coeff, vsq,
                            Aserial[0], Aserial[1]);
        double dt = get_elapsed_msec();
        printf("@time of serial run:\t\t\t[%.3f] milli secondes\n", dt);
	//    minTimeSerial = std::min(minTimeSerial, dt);
    }


      
    // printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", 
    //        minTimeSerial / minTimeISPC, minTimeSerial / minTimeISPCTasks);

    // Check for agreement
    int offset = 0;
    RealType norm=0;
    for (int z = 0; z < Nz; ++z){
      for (int y = 0; y < Ny; ++y){
	for (int x = 0; x < Nx; ++x, ++offset) {
	  RealType value= Aserial[1][offset];
	  norm += value*value;
	}
      }
    }
    std::cout << std::setprecision(16)<< "norm: " << sqrt(norm)<<std::endl;
}