예제 #1
0
void displayHUD(Object * ship, SDL_Texture * font, uint32_t timer)
{
    Object * temp;
    char buffer[BUFFER_SIZE] = {0};
    int previous = 0;
    SDL_Rect bar;

    /* Set the HUD bar */
    bar.x = 0;
    bar.y = 0;
    bar.w = Global->screenWidth;
    bar.h = Global->screenTop;

    setWindowColor(0, 51, 102, 0xFF);
    SDL_RenderFillRect(Global->renderer, &bar);
    setWindowColor(0x0, 0x0, 0x0, 0xFF);

    /* Display score text */
    strncpy(buffer, "Score", BUFFER_SIZE);
    temp = createTextObject(font, buffer, FONT_LARGE, 1.0);

    positionTextObject(temp, 0, 0);

    /* Display score number */
    previous = (temp->x + ((countObjects(temp) + 5 - snprintf(buffer, BUFFER_SIZE, "%d", score)) * temp->clip.w * 0.5 * temp->scale));
    freeObjects(temp);
    temp = createTextObject(font, buffer, FONT_LARGE, 1.0);

    positionTextObject(temp, previous, 0);

    /* Display lives text */
    previous += getTextObjectLength(temp, 5);
    freeObjects(temp);
    strncpy(buffer, "Lives", BUFFER_SIZE);
    temp = createTextObject(font, buffer, FONT_LARGE, 1.0);

    positionTextObject(temp, previous, 0);

    /* Display lives number */
    previous = (temp->x + ((countObjects(temp) + 3 - snprintf(buffer, BUFFER_SIZE, "%d", ship->lives)) * temp->clip.w * 0.5 * temp->scale));
    freeObjects(temp);
    temp = createTextObject(font, buffer, FONT_LARGE, 1.0);

    positionTextObject(temp, previous, 0);    
    freeObjects(temp);

    /* Display Timer */
    snprintf(buffer, BUFFER_SIZE, "%d", timer);
    temp = createTextObject(font, buffer, FONT_LARGE, 1.0);

    positionTextObject(temp, ((Global->screenWidth - (countObjects(temp) * temp->clip.w))), 0);
    freeObjects(temp);
}
예제 #2
0
void KDTree::freeObjects(KDNode *node, std::unordered_set<Drawable *> &freedSet) {
    if(node->is_leaf) {
        for(unsigned int i = 0; i < node->objects.size(); i++) {
            if(freedSet.count(node->objects[i]) == 0) {
                freedSet.insert(node->objects[i]);
                delete node->objects[i];
            }
        }
    } else {
        freeObjects(node->left, freedSet);
        freeObjects(node->right, freedSet);
    }
}
예제 #3
0
void displayTextMiddle(SDL_Texture * font, char * text, objectType type, float scale)
{
    Object * temp = createTextObject(font, text, type, scale);

    positionTextObject(temp, (((Global->screenWidth - Global->screenRight) / 2) + Global->screenLeft) - (getTextObjectLength(temp, 0) / 2), (((Global->screenHeight - Global->screenBottom) / 2) + Global->screenTop) - (temp->clip.h / 2));
    freeObjects(temp);
}
예제 #4
0
int allocWindow(DictionaryIterator *rdi) {
    Tuple *t;
  
    MyWindow *mw = malloc(sizeof(MyWindow));
    if (mw == NULL) {
      return -ENOMEM;
    }
    mw->myTextLayers = createObjects(myTextLayerDestructor);
    if (mw == NULL) {
      free(mw);
      return -ENOMEM;
    }
  
    for (int i = 0;i < NUM_BUTTONS; ++i) {
      mw->button_config[i] = 0;
    }
  
    mw->id = 0;
    mw->w = window_create();
    mw->appTimer = NULL;

  if (mw->w == NULL) {
      freeObjects(mw->myTextLayers);
      free(mw);
      return -ENOMEM;
    }  

    window_set_user_data(mw->w, mw);
  
    // Set handlers to manage the elements inside the Window
    window_set_window_handlers(mw->w, (WindowHandlers) {
      .load = window_load,
      .unload = window_unload
    });
예제 #5
0
int main(int argc, char **argv) {
  double min_x;
  double max_x;
  double min_y;
  double max_y;

  if (! assignIndex(atoi(argv[1]), atoi(argv[2])) ) {
    cerr << "Invalid arguments for field indices" << endl;
    return -1;
  }


  // initlize the GEOS ibjects
  gf = new GeometryFactory(new PrecisionModel(),0);
  wkt_reader= new WKTReader(gf);


  // process input data 
  map<int,Geometry*> geom_polygons;
  string input_line;
  vector<string> fields;
  cerr << "Reading input from stdin..." <<endl; 
  id_type id ; 
  Geometry* geom; 
  const Envelope * env;


  while(cin && getline(cin, input_line) && !cin.eof()){
    fields = parse(input_line);
    if (fields[ID_IDX].length() <1 )
      continue ;  // skip lines which has empty id field 
    id = std::strtoul(fields[ID_IDX].c_str(), NULL, 0);

    if (fields[GEOM_IDX].length() <2 )
    {
#ifndef NDEBUG
      cerr << "skipping record [" << id <<"]"<< endl;
#endif
      continue ;  // skip lines which has empty geometry
    }
    // try {
    geom = wkt_reader->read(fields[GEOM_IDX]);
    env = geom->getEnvelopeInternal();
    cout << fields[ID_IDX] << TAB  << env->getMinX() << TAB << env->getMinY() << TAB 
        << env->getMaxX() << TAB << env->getMaxY() << endl;
  }

  cout.flush();
  cerr.flush();
  freeObjects();
  return 0; // success
}
예제 #6
0
void keyboard(unsigned char key, int x, int y)
{
	switch (key) {
	case 'q':case 'Q':
		//free(scene);
		freeObjects();
		exit(0);
		break;
	case 's':case 'S':
		save_image();
		glutPostRedisplay();
		break;
	default:
		break;
	}
}
예제 #7
0
// main body of the engine
int main(int argc, char** argv)
{
  /*  if (argc < 4) {
      cerr << "usage: resque [predicate] [shape_idx 1] [shape_idx 2] [distance]" <<endl;
      return 1;
      } */
  init();
  int c = 0 ;
  if (!extractParams(argc,argv)) {
    std::cerr <<"ERROR: query parameter extraction error." << std::endl << "Please see documentations, or contact author." << std::endl;
    usage();
    return 1;
  }

  switch (stop.join_cardinality){
    case 1:
    case 2:
      c = mJoinQuery();
      // std::cerr <<"ERROR: input data parsing error." << std::endl << "Please see documentations, or contact author." << std::endl;
      break;

    default:
      std::cerr <<"ERROR: join cardinality does not match engine capacity." << std::endl ;
      return 1;
      break;
  }
  if (c >= 0 )
    std::cerr <<"Query Load: [" << c << "]" <<std::endl;
  else 
  {
    std::cerr <<"Error: ill formatted data. Terminating ....... " << std::endl;
    return 1;
  }
  freeObjects();
  
  cout.flush();
  cerr.flush();
  return 0;
}
예제 #8
0
int resetWindows(DictionaryIterator *rdi) {
  MyWindow *mw;
  int rh;
  objects *tmpWindows;
  APP_LOG(APP_LOG_LEVEL_DEBUG, "resetWindows");
  
  // Doesn't currently work once windows exist.
  if (myWindows != NULL) {
    return 0;
  }
  
  tmpWindows = myWindows;
  myWindows = NULL;
  
  APP_LOG(APP_LOG_LEVEL_DEBUG, "About to create windows structure.");
  myWindows = createObjects(MyWindowDestructor);
  
  APP_LOG(APP_LOG_LEVEL_DEBUG, "objects created. ");
  // Need to create a window to keep the app from
  // exiting, so we might as well make it available.
  rh = allocWindow(NULL);
  APP_LOG(APP_LOG_LEVEL_DEBUG, "root window handle = %d", rh);
  if (rh != 0) {
    APP_LOG(APP_LOG_LEVEL_ERROR, "Root window handle %d != 0", rh);
  }
  mw = getWindowByHandle(rh);
  if (mw == NULL) {
    APP_LOG(APP_LOG_LEVEL_ERROR, "Root window null");
  }
  
  pushWindow(mw, rdi);
  
  if (tmpWindows) {
    freeObjects(tmpWindows);
  }

  return 0;
}
예제 #9
0
static void MyWindowDestructor(void *vptr) {
  MyWindow *mw = (MyWindow *)vptr;
  if (mw->myTextLayers) {
    freeObjects(mw->myTextLayers);
    mw->myTextLayers = NULL;
  }
  
  if (mw->appTimer) {
    app_timer_cancel(mw->appTimer);
    mw->appTimer = NULL;
  }
  
  if (mw->w) {
    APP_LOG(APP_LOG_LEVEL_DEBUG, "About to call window_destroy. mw=%p w=%p", mw, mw->w);
    window_stack_remove(mw->w, false);
    APP_LOG(APP_LOG_LEVEL_DEBUG, "Window removed from stack.");
    window_destroy(mw->w);
    APP_LOG(APP_LOG_LEVEL_DEBUG, "Window Destroyed.");
    mw->w = NULL;
  }
  
  free(mw);
}
예제 #10
0
X3fParser::X3fParser(FileMap* file) {
  decoder = NULL;
  bytes = NULL;
  mFile = file;
  uint32 size = file->getSize();
  if (size<104+128)
    ThrowRDE("X3F file too small");

  if (getHostEndianness() == little)
    bytes = new ByteStream(file->getData(0), size);
  else
    bytes = new ByteStreamSwap(file->getData(0), size);
  try {
    try {
      // Read signature
      if (bytes->getUInt() != 0x62564f46)
        ThrowRDE("X3F Decoder: Not an X3f file (Signature)");

      uint32 version = bytes->getUInt();
      if (version < 0x00020000)
        ThrowRDE("X3F Decoder: File version too old");

      // Skip identifier + mark bits
      bytes->skipBytes(16+4);

      bytes->setAbsoluteOffset(0);
      decoder = new X3fDecoder(file);
      readDirectory();
    } catch (IOException e) {
      ThrowRDE("X3F Decoder: IO Error while reading header: %s", e.what());
    }
  } catch (RawDecoderException e) {
    freeObjects();
    throw e;
  }
}
예제 #11
0
void benchmark_csr(char* clspmvpath, char* oclfilename, int ntimes, cl_device_type deviceType)
{
    char outname[1000];
    sprintf(outname, "%s%s", clspmvpath, "/benchmark/csr.ben");
    FILE* outfile = fopen(outname, "w");
    int methodnum = 2;
    double floptable[methodnum];

    //Get device info
    cl_device_id* devices = NULL;
    cl_context context = NULL;
    cl_command_queue cmdQueue = NULL;
    cl_program program = NULL;


    assert(initialization(deviceType, devices, &context, &cmdQueue, &program, oclfilename) == 1);

    cl_int errorCode = CL_SUCCESS;
    //Assuming GPU is at devices[0]

    cl_uint dev_exec_num;

    size_t devicesSize = 0;
    errorCode = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &devicesSize ); CHECKERROR;
    devices = new cl_device_id[devicesSize / sizeof(cl_device_id)]; CHECKERROR;
    errorCode = clGetContextInfo(context, CL_CONTEXT_DEVICES, devicesSize, devices, NULL ); CHECKERROR;
    errorCode = clGetDeviceInfo(devices[0], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( dev_exec_num ), &dev_exec_num, NULL ); CHECKERROR;

    freeObjects(devices, &context, &cmdQueue, &program);
    printf("\nCompute units %d\n", dev_exec_num);
    unsigned int warp_per_group = CSR_VEC_GROUP_SIZE/WARPSIZE;
    unsigned int max_group_num = dev_exec_num*MAX_WARP_PER_PROC/warp_per_group;

    for (unsigned int size = 1024; size <= 262144; size*=2)
    {
	float* vec = (float*)malloc(sizeof(float)*size);
	float* res = (float*)malloc(sizeof(float)*size);
	initVectorOne<int, float>(vec, size);
	initVectorZero<int, float>(res, size);

	for (unsigned int csrnum = 2; csrnum <= 2048; csrnum *= 4)
	//for (unsigned int csrnum = 128; csrnum <= 2048; csrnum *= 4)
	{
	    if (size*csrnum > 67108864)
		break;
	    if (csrnum > size)
		break;
	    csr_matrix<int, float> csrmat;
	    init_csr_mat(csrmat, size, csrnum);

	    for (unsigned int groupnum = dev_exec_num; groupnum <= max_group_num; groupnum += dev_exec_num)
	    {

		double opttime = 10000.0f;
		double optflop = 0.0f;
		int optmethod = 0;

		spmv_csr_ocl(&csrmat, vec, res, 1, opttime, optflop, optmethod, oclfilename, deviceType, ntimes, floptable, groupnum);

		printf("\n------------------------------------------------------------------------\n");
		printf("CSR Dim %d BN %d GN %d opttime %f ms optflop %f optmethod %d", size, csrnum, groupnum, opttime*1000.0, optflop,  optmethod);
		printf("\n------------------------------------------------------------------------\n");
		fprintf(outfile, "%d %d %d", size, csrnum, groupnum);
		for (unsigned int k = 0; k < methodnum; k++)
		    fprintf(outfile, " %f", floptable[k]);
		fprintf(outfile, "\n");
	    }

	    free_csr_matrix(csrmat);
	}
	free(vec);
	free(res);
    }

    fclose(outfile);
}
예제 #12
0
void spmv_csr_ocl(csr_matrix<int, float>* mat, float* vec, float* result, int dim2Size, double& opttime, double& optflop, int& optmethod, char* oclfilename, cl_device_type deviceType, int ntimes, double* floptable, int groupnum)
{
    cl_device_id* devices = NULL;
    cl_context context = NULL;
    cl_command_queue cmdQueue = NULL;
    cl_program program = NULL;

    assert(initialization(deviceType, devices, &context, &cmdQueue, &program, oclfilename) == 1);

    cl_int errorCode = CL_SUCCESS;

    //Create device memory objects
    cl_mem devRowPtr;
    cl_mem devColId;
    cl_mem devData;
    cl_mem devVec;
    cl_mem devTexVec;
    cl_mem devRes;

    //Initialize values
    int nnz = mat->matinfo.nnz;
    int vecsize = mat->matinfo.width;
    int rownum = mat->matinfo.height;
    int rowptrsize = rownum + 1;
    ALLOCATE_GPU_READ(devRowPtr, mat->csr_row_ptr, sizeof(int)*rowptrsize);
    ALLOCATE_GPU_READ(devColId, mat->csr_col_id, sizeof(int)*nnz);
    ALLOCATE_GPU_READ(devData, mat->csr_data, sizeof(float)*nnz);
    ALLOCATE_GPU_READ(devVec, vec, sizeof(float)*vecsize);
    int paddedres = findPaddedSize(rownum, 16);
    devRes = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*paddedres, NULL, &errorCode); CHECKERROR;
    //errorCode = clEnqueueWriteBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, result, 0, NULL, NULL); CHECKERROR;

	const cl_image_format floatFormat =
	{
	    CL_R,
	    CL_FLOAT,
	};

	int width = VEC2DWIDTH;
	int height = (vecsize + VEC2DWIDTH - 1)/VEC2DWIDTH;
	float* image2dVec = (float*)malloc(sizeof(float)*width*height);
	memset(image2dVec, 0, sizeof(float)*width*height);
	for (int i = 0; i < vecsize; i++)
	{
	    image2dVec[i] = vec[i];
	}
	size_t origin[] = {0, 0, 0};
	size_t vectorSize[] = {width, height, 1};
	devTexVec = clCreateImage2D(context, CL_MEM_READ_ONLY, &floatFormat, width, height, 0, NULL, &errorCode); CHECKERROR;
	errorCode = clEnqueueWriteImage(cmdQueue, devTexVec, CL_TRUE, origin, vectorSize, 0, 0, image2dVec, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);


    opttime = 10000.0f;
    optmethod = 0;
    int dim2 = dim2Size;



    {
	int methodid = 0;
	cl_mem devRowPtrPad;
	int padrowsize = findPaddedSize(rownum, CSR_VEC_GROUP_SIZE/WARPSIZE);
	int* rowptrpad = (int*)malloc(sizeof(int)*(padrowsize+1));
	memset(rowptrpad, 0, sizeof(int)*(padrowsize+1));
	for (int i = 0; i <= mat->matinfo.height; i++)
	    rowptrpad[i] = mat->csr_row_ptr[i];
	ALLOCATE_GPU_READ(devRowPtrPad, rowptrpad, sizeof(int)*(padrowsize+1));
	clFinish(cmdQueue);

	printf("\nRow Num %d padded size %d\n", rownum, padrowsize);
	cl_uint work_dim = 2;
	//int dim2 = 16;
	size_t blocksize[] = {CSR_VEC_GROUP_SIZE, 1};

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, "gpu_csr_ve_slm_pm_fs", &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devRowPtrPad); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devColId); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(cl_mem), &devVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(int), &rownum); CHECKERROR;


	{
	    size_t globalsize[] = {groupnum * CSR_VEC_GROUP_SIZE, dim2};

	    for (int k = 0; k < 3; k++)
	    {
		errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	    }
	    clFinish(cmdQueue);

	    double teststart = timestamp();
	    for (int i = 0; i < ntimes; i++)
	    {
		errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	    }
	    clFinish(cmdQueue);
	    double testend = timestamp();
	    double time_in_sec = (testend - teststart)/(double)dim2;
	    double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	    printf("\nCSR vector SLM row ptr padded mat strided rows fixed size:%d cpu time %lf ms GFLOPS %lf code %d \n\n", groupnum * CSR_VEC_GROUP_SIZE,   time_in_sec / (double) ntimes * 1000, gflops, methodid);

	    double onetime = time_in_sec / (double) ntimes;
	    floptable[methodid] = gflops;
	    if (onetime < opttime)
	    {
		opttime = onetime;
		optmethod = methodid;
		optflop = gflops;
	    }
	}

	if (devRowPtrPad)
	    clReleaseMemObject(devRowPtrPad);
	if (csrKernel)
	    clReleaseKernel(csrKernel);
	free(rowptrpad);


    }





    {
	int methodid = 1;
	cl_mem devRowPtrPad;
	int padrowsize = findPaddedSize(rownum, CSR_VEC_GROUP_SIZE/WARPSIZE);
	int* rowptrpad = (int*)malloc(sizeof(int)*(padrowsize+1));
	memset(rowptrpad, 0, sizeof(int)*(padrowsize+1));
	for (int i = 0; i <= mat->matinfo.height; i++)
	    rowptrpad[i] = mat->csr_row_ptr[i];
	ALLOCATE_GPU_READ(devRowPtrPad, rowptrpad, sizeof(int)*(padrowsize+1));
	clFinish(cmdQueue);

	printf("\nRow Num %d padded size %d\n", rownum, padrowsize);
	cl_uint work_dim = 2;
	//int dim2 = 16;
	size_t blocksize[] = {CSR_VEC_GROUP_SIZE, 1};

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, "gpu_csr_ve_reduction_fs", &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devRowPtrPad); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devColId); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(cl_mem), &devVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(int), &rownum); CHECKERROR;


	{
	    size_t globalsize[] = {groupnum * CSR_VEC_GROUP_SIZE, dim2};

	    for (int k = 0; k < 3; k++)
	    {
		errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	    }
	    clFinish(cmdQueue);

	    double teststart = timestamp();
	    for (int i = 0; i < ntimes; i++)
	    {
		errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	    }
	    clFinish(cmdQueue);
	    double testend = timestamp();
	    double time_in_sec = (testend - teststart)/(double)dim2;
	    double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	    printf("\nCSR vector SLM row ptr padded mat strided rows fixed size:%d cpu time %lf ms GFLOPS %lf code %d \n\n", groupnum * CSR_VEC_GROUP_SIZE,   time_in_sec / (double) ntimes * 1000, gflops, methodid);

	    double onetime = time_in_sec / (double) ntimes;
	    floptable[methodid] = gflops;
	    if (onetime < opttime)
	    {
		opttime = onetime;
		optmethod = methodid;
		optflop = gflops;
	    }
	}

	if (devRowPtrPad)
	    clReleaseMemObject(devRowPtrPad);
	if (csrKernel)
	    clReleaseKernel(csrKernel);
	free(rowptrpad);


    }


    //Clean up
    if (image2dVec)
	free(image2dVec);

    if (devRowPtr)
	clReleaseMemObject(devRowPtr);
    if (devColId)
	clReleaseMemObject(devColId);
    if (devData)
	clReleaseMemObject(devData);
    if (devVec)
	clReleaseMemObject(devVec);
    if (devTexVec)
	clReleaseMemObject(devTexVec);
    if (devRes)
	clReleaseMemObject(devRes);

    freeObjects(devices, &context, &cmdQueue, &program);

}
예제 #13
0
int main(int argc, char **argv) {
  double min_x;
  double max_x;
  double min_y;
  double max_y;

  if (argc < 3) {
     cerr << "Not enough arguments:" << argv[0] << " [geomidx] [ratio] " << endl;
     return -1;
  }

  GEOM_IDX = atoi(argv[1]) -1;
  if (GEOM_IDX < 0) {
    cerr << "Invalid arguments for field indices" << endl;
    return -1;
  }

  ratio = strtod(argv[2], NULL);
   
  // initlize the GEOS ibjects
  gf = new GeometryFactory(new PrecisionModel(),0);
  wkt_reader= new WKTReader(gf);


  // process input data 
  map<int,Geometry*> geom_polygons;
  string input_line;
  vector<string> fields;
  cerr << "Reading input from stdin..." <<endl; 
  id_type id ; 
  Geometry* geom; 
  const Envelope * env;


  long count = 1;
  while(cin && getline(cin, input_line) && !cin.eof()){
    fields = parse(input_line);
    //if (fields[ID_IDX].length() <1 )
    //  continue ;  // skip lines which has empty id field 
    // id = std::strtoul(fields[ID_IDX].c_str(), NULL, 0);
    if (fields[GEOM_IDX].length() <2 )
    {
#ifndef NDEBUG
      cerr << "skipping record [" << id <<"]"<< endl;
#endif
      continue ;  // skip lines which has empty geometry
    }
    // try {
    geom = wkt_reader->read(fields[GEOM_IDX]);
    env = geom->getEnvelopeInternal();
    if ( (double) rand() / (double) (RAND_MAX) < ratio) {
        cout << count++ << TAB  << env->getMinX() << TAB << env->getMinY() << TAB 
          << env->getMaxX() << TAB << env->getMaxY() << endl;
    }
    delete geom;
  }

  cout.flush();
  cerr.flush();
  freeObjects();
  return 0; // success
}
예제 #14
0
void spmv_sell_ocl(sell_matrix<int, float>* mat, float* vec, float* result, int dim2Size, double& opttime, double& optflop, int& optmethod, char* oclfilename, cl_device_type deviceType, int ntimes, double* floptable)
{
    cl_device_id* devices = NULL;
    cl_context context = NULL;
    cl_command_queue cmdQueue = NULL;
    cl_program program = NULL;

    assert(initialization(deviceType, devices, &context, &cmdQueue, &program, oclfilename) == 1);

    cl_int errorCode = CL_SUCCESS;

    //Create device memory objects
    cl_mem devSlicePtr;
    cl_mem devColid;
    cl_mem devData;
    cl_mem devVec;
    cl_mem devRes;
    cl_mem devTexVec;

    //Initialize values
    int nnz = mat->matinfo.nnz;
    int rownum = mat->matinfo.height;
    int vecsize = mat->matinfo.width;
    int sliceheight = mat->sell_slice_height;
    int slicenum = mat->sell_slice_num;
    int datasize = mat->sell_slice_ptr[slicenum];
    ALLOCATE_GPU_READ(devSlicePtr, mat->sell_slice_ptr, sizeof(int)*(slicenum + 1));
    ALLOCATE_GPU_READ(devColid, mat->sell_col_id, sizeof(int)*datasize);
    ALLOCATE_GPU_READ(devData, mat->sell_data, sizeof(float)*datasize);
    ALLOCATE_GPU_READ(devVec, vec, sizeof(float)*vecsize);
    int paddedres = findPaddedSize(rownum, 512);
    devRes = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*paddedres, NULL, &errorCode); CHECKERROR;
    errorCode = clEnqueueWriteBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, result, 0, NULL, NULL); CHECKERROR;
    const cl_image_format floatFormat =
    {
	CL_R,
	CL_FLOAT,
    };


    int width = VEC2DWIDTH;
    int height = (vecsize + VEC2DWIDTH - 1)/VEC2DWIDTH;
    float* image2dVec = (float*)malloc(sizeof(float)*width*height);
    memset(image2dVec, 0, sizeof(float)*width*height);
    for (int i = 0; i < vecsize; i++)
    {
	image2dVec[i] = vec[i];
    }
    size_t origin[] = {0, 0, 0};
    size_t vectorSize[] = {width, height, 1};
    devTexVec = clCreateImage2D(context, CL_MEM_READ_ONLY, &floatFormat, width, height, 0, NULL, &errorCode); CHECKERROR;
    errorCode = clEnqueueWriteImage(cmdQueue, devTexVec, CL_TRUE, origin, vectorSize, 0, 0, image2dVec, 0, NULL, NULL); CHECKERROR;
    clFinish(cmdQueue);

    //printf("\nvec length %d padded length %d", mat->matinfo.width, padveclength);

    int dim2 = dim2Size;
    if (sliceheight == WARPSIZE)
    {
	int methodid = 0;
	cl_uint work_dim = 2;
	size_t blocksize[] = {SELL_GROUP_SIZE, 1};
	int gsize = ((rownum + SELL_GROUP_SIZE - 1)/SELL_GROUP_SIZE)*SELL_GROUP_SIZE;
	size_t globalsize[] = {gsize, dim2};
	//printf("gsize %d rownum %d slicenum %d sliceheight %d datasize %d nnz %d vecsize %d \n", gsize, rownum, slicenum, sliceheight, datasize, nnz, vecsize);
	//int warpnum = SELL_GROUP_SIZE / WARPSIZE;

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, "gpu_sell_warp", &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devSlicePtr); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devColid); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(cl_mem), &devVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(int),    &slicenum); CHECKERROR;

	for (int k = 0; k < 3; k++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);

	double teststart = timestamp();
	for (int i = 0; i < ntimes; i++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);
	double testend = timestamp();
	double time_in_sec = (testend - teststart)/(double)dim2;
	double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	printf("\nSELL cpu warp time %lf ms GFLOPS %lf code %d \n\n",   time_in_sec / (double) ntimes * 1000, gflops, methodid);

	if (csrKernel)
	    clReleaseKernel(csrKernel);

	double onetime = time_in_sec / (double) ntimes;
	floptable[methodid] = gflops;
	if (onetime < opttime)
	{
	    opttime = onetime;
	    optmethod = methodid;
	    optflop = gflops;
	}

    }

    if (sliceheight == SELL_GROUP_SIZE)
    {
	int methodid = 1;
	cl_uint work_dim = 2;
	size_t blocksize[] = {SELL_GROUP_SIZE, 1};
	int gsize = slicenum * SELL_GROUP_SIZE;
	size_t globalsize[] = {gsize, dim2};

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, "gpu_sell_group", &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devSlicePtr); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devColid); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(cl_mem), &devVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(int),    &slicenum); CHECKERROR;

	for (int k = 0; k < 3; k++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);

	double teststart = timestamp();
	for (int i = 0; i < ntimes; i++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);
	double testend = timestamp();
	double time_in_sec = (testend - teststart)/(double)dim2;
	double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	printf("\nSELL cpu group time %lf ms GFLOPS %lf code %d \n\n",   time_in_sec / (double) ntimes * 1000, gflops, methodid);

	if (csrKernel)
	    clReleaseKernel(csrKernel);

	double onetime = time_in_sec / (double) ntimes;
	floptable[methodid] = gflops;
	if (onetime < opttime)
	{
	    opttime = onetime;
	    optmethod = methodid;
	    optflop = gflops;
	}

    }

    //Clean up
    if (image2dVec)
	free(image2dVec);

    if (devSlicePtr)
	clReleaseMemObject(devSlicePtr);
    if (devColid)
	clReleaseMemObject(devColid);
    if (devData)
	clReleaseMemObject(devData);
    if (devVec)
	clReleaseMemObject(devVec);
    if (devTexVec)
	clReleaseMemObject(devTexVec);
    if (devRes)
	clReleaseMemObject(devRes);


    freeObjects(devices, &context, &cmdQueue, &program);
}
예제 #15
0
int clearWindow(MyWindow *mw, DictionaryIterator *rdi) {
  freeObjects(mw->myTextLayers);
  mw->myTextLayers = NULL;
  return 0;
}
예제 #16
0
void deinit_windows() {
  if (myWindows) {
    freeObjects(myWindows);
    myWindows = NULL;
  }
}
예제 #17
0
/* Flushes all memory */
void Sys_Purge() {
    
    freeObjects();
    SysStatus = SUCCEEDED;
}
예제 #18
0
X3fParser::~X3fParser(void)
{
  freeObjects();
}
예제 #19
0
void KDTree::freeAllObj() {
    std::unordered_set<Drawable *> freedSet;
    freeObjects(root, freedSet);
}
예제 #20
0
void spmv_b4ell_ocl(b4ell_matrix<int, float>* mat, float* vec, float* result, int dim2Size, double& opttime, int& optmethod, char* oclfilename, cl_device_type deviceType, float* coores, int ntimes, int bw, int bh)
{
    cl_device_id* devices = NULL;
    cl_context context = NULL;
    cl_command_queue cmdQueue = NULL;
    cl_program program = NULL;

    assert(initialization(deviceType, devices, &context, &cmdQueue, &program, oclfilename) == 1);

    cl_int errorCode = CL_SUCCESS;

    //Create device memory objects
    cl_mem devColid;
    cl_mem devData;
    cl_mem devVec;
    cl_mem devRes;
    cl_mem devTexVec;

    //Initialize values
    int col_align = mat->b4ell_height_aligned;
    int data_align = mat->b4ell_float4_aligned;
    int nnz = mat->matinfo.nnz;
    int rownum = mat->matinfo.height;
    int blockrownum = mat->b4ell_row_num;
    int vecsize = mat->matinfo.width;
    int b4ellnum = mat->b4ell_block_num;
    int bwidth = mat->b4ell_bwidth;
    int bheight = mat->b4ell_bheight;
    int width4num = bwidth / 4;
    int padveclen = findPaddedSize(vecsize, 8);
    float* paddedvec = (float*)malloc(sizeof(float)*padveclen);
    memset(paddedvec, 0, sizeof(float)*padveclen);
    memcpy(paddedvec, vec, sizeof(float)*vecsize);
    ALLOCATE_GPU_READ(devColid, mat->b4ell_col_id, sizeof(int)*col_align*b4ellnum);
    ALLOCATE_GPU_READ(devData, mat->b4ell_data, sizeof(float)*data_align*bheight*width4num*b4ellnum);
    ALLOCATE_GPU_READ(devVec, paddedvec, sizeof(float)*padveclen);
    int paddedres = findPaddedSize(rownum, 512);
    devRes = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*paddedres, NULL, &errorCode); CHECKERROR;
    errorCode = clEnqueueWriteBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, result, 0, NULL, NULL); CHECKERROR;
    const cl_image_format floatFormat =
    {
	CL_RGBA,
	CL_FLOAT,
    };


    int width = VEC2DWIDTH;
    int height = (vecsize + VEC2DWIDTH - 1)/VEC2DWIDTH;
    if (height % 4 != 0)
	height += (4 - (height % 4));
    float* image2dVec = (float*)malloc(sizeof(float)*width*height);
    memset(image2dVec, 0, sizeof(float)*width*height);
    for (int i = 0; i < vecsize; i++)
    {
	image2dVec[i] = vec[i];
    }
    size_t origin[] = {0, 0, 0};
    size_t vectorSize[] = {width, height/4, 1};
    devTexVec = clCreateImage2D(context, CL_MEM_READ_ONLY, &floatFormat, width, height/4, 0, NULL, &errorCode); CHECKERROR;
    errorCode = clEnqueueWriteImage(cmdQueue, devTexVec, CL_TRUE, origin, vectorSize, 0, 0, image2dVec, 0, NULL, NULL); CHECKERROR;
    clFinish(cmdQueue);

    //printf("\nvec length %d padded length %d", mat->matinfo.width, padveclength);

    opttime = 10000.0f;
    optmethod = 0;
    int dim2 = dim2Size;
    {
	int methodid = 0;
	cl_uint work_dim = 2;
	size_t blocksize[] = {BELL_GROUP_SIZE, 1};
	int gsize = ((blockrownum + BELL_GROUP_SIZE - 1)/BELL_GROUP_SIZE)*BELL_GROUP_SIZE;
	size_t globalsize[] = {gsize, dim2};
	int data_align4 = data_align / 4;
	char kernelname[100] = "gpu_bell00";
	kernelname[8] += bh;
	kernelname[9] += bw;

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, kernelname, &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devColid); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(int),    &data_align4); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(int),    &col_align); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(int),    &b4ellnum); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(cl_mem), &devVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 6, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 7, sizeof(int),    &blockrownum); CHECKERROR;

	errorCode = clEnqueueWriteBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, result, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	float* tmpresult = (float*)malloc(sizeof(float)*rownum);
	errorCode = clEnqueueReadBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, tmpresult, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	two_vec_compare(coores, tmpresult, rownum);
	free(tmpresult);

	for (int k = 0; k < 3; k++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);

	double teststart = timestamp();
	for (int i = 0; i < ntimes; i++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);
	double testend = timestamp();
	double time_in_sec = (testend - teststart)/(double)dim2;
	double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	printf("\nBELL %dx%d block cpu time %lf ms GFLOPS %lf code %d \n\n", bh, bw,  time_in_sec / (double) ntimes * 1000, gflops, methodid);

	if (csrKernel)
	    clReleaseKernel(csrKernel);

	double onetime = time_in_sec / (double) ntimes;
	if (onetime < opttime)
	{
	    opttime = onetime;
	    optmethod = methodid;
	}

    }
    {
	int methodid = 1;
	cl_uint work_dim = 2;
	size_t blocksize[] = {BELL_GROUP_SIZE, 1};
	int gsize = ((blockrownum + BELL_GROUP_SIZE - 1)/BELL_GROUP_SIZE)*BELL_GROUP_SIZE;
	size_t globalsize[] = {gsize, dim2};
	int data_align4 = data_align / 4;
	char kernelname[100] = "gpu_bell00_mad";
	kernelname[8] += bh;
	kernelname[9] += bw;

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, kernelname, &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devColid); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(int),    &data_align4); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(int),    &col_align); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(int),    &b4ellnum); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(cl_mem), &devVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 6, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 7, sizeof(int),    &blockrownum); CHECKERROR;

	errorCode = clEnqueueWriteBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, result, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	float* tmpresult = (float*)malloc(sizeof(float)*rownum);
	errorCode = clEnqueueReadBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, tmpresult, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	two_vec_compare(coores, tmpresult, rownum);
	free(tmpresult);

	for (int k = 0; k < 3; k++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);

	double teststart = timestamp();
	for (int i = 0; i < ntimes; i++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);
	double testend = timestamp();
	double time_in_sec = (testend - teststart)/(double)dim2;
	double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	printf("\nBELL %dx%d block mad cpu time %lf ms GFLOPS %lf code %d \n\n", bh, bw,  time_in_sec / (double) ntimes * 1000, gflops, methodid);

	if (csrKernel)
	    clReleaseKernel(csrKernel);

	double onetime = time_in_sec / (double) ntimes;
	if (onetime < opttime)
	{
	    opttime = onetime;
	    optmethod = methodid;
	}

    }
    {
	int methodid = 100;
	cl_uint work_dim = 2;
	size_t blocksize[] = {BELL_GROUP_SIZE, 1};
	int gsize = ((blockrownum + BELL_GROUP_SIZE - 1)/BELL_GROUP_SIZE)*BELL_GROUP_SIZE;
	size_t globalsize[] = {gsize, dim2};
	int data_align4 = data_align / 4;
	char kernelname[100] = "gpu_bell00_tx";
	kernelname[8] += bh;
	kernelname[9] += bw;

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, kernelname, &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devColid); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(int),    &data_align4); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(int),    &col_align); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(int),    &b4ellnum); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(cl_mem), &devTexVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 6, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 7, sizeof(int),    &blockrownum); CHECKERROR;

	errorCode = clEnqueueWriteBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, result, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	float* tmpresult = (float*)malloc(sizeof(float)*rownum);
	errorCode = clEnqueueReadBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, tmpresult, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	two_vec_compare(coores, tmpresult, rownum);
	free(tmpresult);

	for (int k = 0; k < 3; k++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);

	double teststart = timestamp();
	for (int i = 0; i < ntimes; i++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);
	double testend = timestamp();
	double time_in_sec = (testend - teststart)/(double)dim2;
	double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	printf("\nBELL %dx%d block tx cpu time %lf ms GFLOPS %lf code %d \n\n", bh, bw,  time_in_sec / (double) ntimes * 1000, gflops, methodid);

	if (csrKernel)
	    clReleaseKernel(csrKernel);

	double onetime = time_in_sec / (double) ntimes;
	if (onetime < opttime)
	{
	    opttime = onetime;
	    optmethod = methodid;
	}

    }
    {
	int methodid = 101;
	cl_uint work_dim = 2;
	size_t blocksize[] = {BELL_GROUP_SIZE, 1};
	int gsize = ((blockrownum + BELL_GROUP_SIZE - 1)/BELL_GROUP_SIZE)*BELL_GROUP_SIZE;
	size_t globalsize[] = {gsize, dim2};
	int data_align4 = data_align / 4;
	char kernelname[100] = "gpu_bell00_mad_tx";
	kernelname[8] += bh;
	kernelname[9] += bw;

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, kernelname, &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devColid); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(int),    &data_align4); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(int),    &col_align); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(int),    &b4ellnum); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(cl_mem), &devTexVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 6, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 7, sizeof(int),    &blockrownum); CHECKERROR;

	errorCode = clEnqueueWriteBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, result, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	float* tmpresult = (float*)malloc(sizeof(float)*rownum);
	errorCode = clEnqueueReadBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, tmpresult, 0, NULL, NULL); CHECKERROR;
	clFinish(cmdQueue);
	two_vec_compare(coores, tmpresult, rownum);
	free(tmpresult);

	for (int k = 0; k < 3; k++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);

	double teststart = timestamp();
	for (int i = 0; i < ntimes; i++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);
	double testend = timestamp();
	double time_in_sec = (testend - teststart)/(double)dim2;
	double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	printf("\nBELL %dx%d block mad tx cpu time %lf ms GFLOPS %lf code %d \n\n", bh, bw,  time_in_sec / (double) ntimes * 1000, gflops, methodid);

	if (csrKernel)
	    clReleaseKernel(csrKernel);

	double onetime = time_in_sec / (double) ntimes;
	if (onetime < opttime)
	{
	    opttime = onetime;
	    optmethod = methodid;
	}

    }

    //Clean up
    if (image2dVec)
	free(image2dVec);

    if (devColid)
	clReleaseMemObject(devColid);
    if (devData)
	clReleaseMemObject(devData);
    if (devVec)
	clReleaseMemObject(devVec);
    if (devTexVec)
	clReleaseMemObject(devTexVec);
    if (devRes)
	clReleaseMemObject(devRes);


    freeObjects(devices, &context, &cmdQueue, &program);
}
예제 #21
0
int main(int argc, char **argv) {

  if (argc != 6 && argc != 5) {
     cerr << "ERROR: Not enough arguments. Usage: " << argv[0]
            << " [geomid1] [geomid2] [partition_file] [prefixpath1] [prefixpath2]" << endl;
     return -1;
  }
  //int uid_idx  = args_info.uid_arg;
  filename = argv[3];
  GEOM_IDX = 2;
  JOIN_IDX = -1;

  char* stdinfilename = getenv("mapreduce_map_input_file");
  char* prefix1 = argv[4];
  char* prefix2 = NULL;
  if (argc == 6) {
     prefix2 = argv[5];
  }

  if ( strstr(stdinfilename, prefix1) == NULL) {
     JOIN_IDX = 2;
     GEOM_IDX = atoi(argv[2]);
  } else {
     JOIN_IDX = 1;
     GEOM_IDX = atoi(argv[1]);
  }
 


 // cerr << "JOIN_IDX: " << JOIN_IDX << " Geom: " << GEOM_IDX <<endl;

  if (JOIN_IDX < 0) {
        cerr << "Invalid join index" << endl;
        return -1;
   }

  gf = new GeometryFactory(new PrecisionModel(),0);
  wkt_reader= new WKTReader(gf);


  // process input data 
  // map<int,Geometry*> geom_polygons;
  string input_line;
  vector<string> fields;
  id_type id = 0; 
  Geometry* geom ; 

  genTiles();

  bool ret = buildIndex();
  if (ret == false) {
    cerr << "ERROR: Index building on tile structure has failed ." << std::endl;
    return 1 ;
  }
  else 
#ifndef NDEBUG  
    cerr << "GRIDIndex Generated successfully." << endl;
#endif


  cerr << "Reading input from stdin..." <<endl; 
  while(cin && getline(cin, input_line) && !cin.eof()){
    fields = parse(input_line);
    if (fields[GEOM_IDX].length() <2 )
    {
#ifndef NDEBUG
      cerr << "skipping record [" << id <<"]"<< endl;
#endif
      continue ;  // skip lines which has empty geometry
    }
    // try {
    geom = wkt_reader->read(fields[GEOM_IDX]);
    //}
    /*catch (...)
      {
      cerr << "WARNING: Record [id = " <<i << "] is not well formatted "<<endl;
      cerr << input_line << endl;
      continue ;
      }*/
//     cout << input_line << endl;
     doQuery(geom);
     emitHits(geom, input_line);
     delete geom;
  }

 // cerr << "Number of tiles: " << geom_tiles.size() << endl;
  
  // build spatial index for input polygons 




  cout.flush();
  cerr.flush();
  freeObjects();
  return 0; // success
}
예제 #22
0
void spmv_coo_ocl(coo_matrix<int, float>* mat, float* vec, float* result, int dim2Size, double& opttime, double& optflop, int& optmethod, char* oclfilename, cl_device_type deviceType, int ntimes, double* floptable, int maxgroupnum)
{

    for (int i = 0; i < mat->matinfo.height; i++)
	result[i] = 0.0f;
    cl_device_id* devices = NULL;
    cl_context context = NULL;
    cl_command_queue cmdQueue = NULL;
    cl_program program = NULL;

    assert(initialization(deviceType, devices, &context, &cmdQueue, &program, oclfilename) == 1);

    cl_int errorCode = CL_SUCCESS;

    //Create device memory objects
    cl_mem devRowid;
    cl_mem devColid;
    cl_mem devData;
    cl_mem devVec;
    cl_mem devRes;
    cl_mem devTexVec;
    cl_mem devTmpRow;
    cl_mem devTmpData;

    //Initialize values
    int nnz = mat->matinfo.nnz;
    int rownum = mat->matinfo.height;
    int vecsize = mat->matinfo.width;
    int num_units = nnz / COO_GROUP_SIZE;
    if (nnz % COO_GROUP_SIZE != 0)
	num_units++;
    int group_num = (num_units < maxgroupnum) ? num_units : maxgroupnum;
    int work_size = group_num * COO_GROUP_SIZE;
    int num_iters = nnz / work_size;
    if (nnz % work_size != 0)
	num_iters++;
    int process_size = num_iters * COO_GROUP_SIZE;
    int active_warp = num_units / num_iters;
    if (num_units % num_iters != 0)
	active_warp++;
    int paddedNNZ = findPaddedSize(nnz, COO_ALIGNMENT);
    int* paddedRow = (int*)malloc(sizeof(int)*paddedNNZ);
    int* paddedCol = (int*)malloc(sizeof(int)*paddedNNZ);
    float* paddedData = (float*)malloc(sizeof(float)*paddedNNZ);
    memcpy(paddedRow, mat->coo_row_id, sizeof(int)*nnz);
    memcpy(paddedCol, mat->coo_col_id, sizeof(int)*nnz);
    memcpy(paddedData, mat->coo_data, sizeof(float)*nnz);
    for (int i = nnz; i < paddedNNZ; i++)
    {
	paddedRow[i] = mat->coo_row_id[nnz - 1];
	paddedCol[i] = mat->coo_col_id[nnz - 1];
	paddedData[i] = 0.0f;
    }


    ALLOCATE_GPU_READ(devRowid, paddedRow, sizeof(int)*paddedNNZ);
    ALLOCATE_GPU_READ(devColid, paddedCol, sizeof(int)*paddedNNZ);
    ALLOCATE_GPU_READ(devData, paddedData, sizeof(float)*paddedNNZ);
    ALLOCATE_GPU_READ(devVec, vec, sizeof(float)*vecsize);
    int paddedres = findPaddedSize(rownum, 512);
    devRes = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*paddedres, NULL, &errorCode); CHECKERROR;
    errorCode = clEnqueueWriteBuffer(cmdQueue, devRes, CL_TRUE, 0, sizeof(float)*rownum, result, 0, NULL, NULL); CHECKERROR;
    devTmpRow = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*maxgroupnum, NULL, &errorCode); CHECKERROR;
    devTmpData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*maxgroupnum, NULL, &errorCode); CHECKERROR;

    const cl_image_format floatFormat =
    {
	CL_R,
	CL_FLOAT,
    };


    int width = VEC2DWIDTH;
    int height = (vecsize + VEC2DWIDTH - 1)/VEC2DWIDTH;
    float* image2dVec = (float*)malloc(sizeof(float)*width*height);
    memset(image2dVec, 0, sizeof(float)*width*height);
    for (int i = 0; i < vecsize; i++)
    {
	image2dVec[i] = vec[i];
    }
    size_t origin[] = {0, 0, 0};
    size_t vectorSize[] = {width, height, 1};
    devTexVec = clCreateImage2D(context, CL_MEM_READ_ONLY, &floatFormat, width, height, 0, NULL, &errorCode); CHECKERROR;
    errorCode = clEnqueueWriteImage(cmdQueue, devTexVec, CL_TRUE, origin, vectorSize, 0, 0, image2dVec, 0, NULL, NULL); CHECKERROR;
    clFinish(cmdQueue);


    opttime = 10000.0f;
    optmethod = 0;
    int dim2 = dim2Size;

    {
	int methodid = 0;
	cl_uint work_dim = 2;
	size_t blocksize[] = {COO_GROUP_SIZE, 1};
	int gsize = group_num * COO_GROUP_SIZE;
	size_t globalsize[] = {gsize, dim2};

	cl_kernel csrKernel = NULL;
	csrKernel = clCreateKernel(program, "gpu_coo_s1", &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 0, sizeof(cl_mem), &devRowid); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 1, sizeof(cl_mem), &devColid); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 2, sizeof(cl_mem), &devData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 3, sizeof(int),    &process_size); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 4, sizeof(int),    &paddedNNZ); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 5, sizeof(cl_mem), &devVec); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 6, sizeof(cl_mem), &devRes); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 7, sizeof(cl_mem), &devTmpRow); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel, 8, sizeof(cl_mem), &devTmpData); CHECKERROR;

	printf("process size %d nnz %d gsize %d active_warp %d\n", process_size, paddedNNZ, gsize, active_warp);

	size_t blocksize2[] = {COO_GROUP_SIZE * 2, 1};
	size_t globalsize2[] = {COO_GROUP_SIZE * 2, dim2};


	cl_kernel csrKernel2 = NULL;
	csrKernel2 = clCreateKernel(program, "gpu_coo_s2", &errorCode); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel2, 0, sizeof(cl_mem), &devTmpRow); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel2, 1, sizeof(cl_mem), &devTmpData); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel2, 2, sizeof(int), &active_warp); CHECKERROR;
	errorCode = clSetKernelArg(csrKernel2, 3, sizeof(cl_mem), &devRes); CHECKERROR;

	for (int k = 0; k < 3; k++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);


	//int* tmpRow = (int*)malloc(sizeof(int)*maxgroupnum);
	//float* tmpData = (float*)malloc(sizeof(float)*maxgroupnum);


	double teststart = timestamp();
	for (int i = 0; i < ntimes; i++)
	{
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel, work_dim, NULL, globalsize, blocksize, 0, NULL, NULL); CHECKERROR;
	    errorCode = clEnqueueNDRangeKernel(cmdQueue, csrKernel2, work_dim, NULL, globalsize2, blocksize2, 0, NULL, NULL); CHECKERROR;
	}
	clFinish(cmdQueue);

	double testend = timestamp();
	double time_in_sec = (testend - teststart)/(double)dim2;
	double gflops = (double)nnz*2/(time_in_sec/(double)ntimes)/(double)1e9;
	printf("\nCOO cpu time %lf ms GFLOPS %lf code %d \n\n",   time_in_sec / (double) ntimes * 1000, gflops, methodid);

	if (csrKernel)
	    clReleaseKernel(csrKernel);
	if (csrKernel2)
	    clReleaseKernel(csrKernel2);

	double onetime = time_in_sec / (double) ntimes;
	floptable[methodid] = gflops;
	if (onetime < opttime)
	{
	    opttime = onetime;
	    optmethod = methodid;
	    optflop = gflops;
	}
	//for (int i = 0; i < active_warp; i++)
	//printf("Row %d Data %f\n", tmpRow[i], tmpData[i]);
    }


    //Clean up
    if (paddedRow)
	free(paddedRow);
    if (paddedCol)
	free(paddedCol);
    if (paddedData)
	free(paddedData);
    if (image2dVec)
	free(image2dVec);

    if (devRowid)
	clReleaseMemObject(devRowid);
    if (devColid)
	clReleaseMemObject(devColid);
    if (devData)
	clReleaseMemObject(devData);
    if (devVec)
	clReleaseMemObject(devVec);
    if (devTexVec)
	clReleaseMemObject(devTexVec);
    if (devRes)
	clReleaseMemObject(devRes);
    if (devTmpRow)
	clReleaseMemObject(devTmpRow);
    if (devTmpData)
	clReleaseMemObject(devTmpData);

    freeObjects(devices, &context, &cmdQueue, &program);
}