Example #1
0
int main(int argc, char** argv) 
{

   int step,burst;

   int nparticle = 8192; /* MUST be a nice power of two for simplicity */
   int nstep = 500;
   int nburst = 20; /* MUST divide the value of nstep without remainder */
   int nthread = 64; /* chosen for ATI Radeon HD 5870 */

   float dt = 0.0001;
   float eps = 0.0001;
   cl_float4* pos1 = (cl_float4*)clmalloc(stdgpu,nparticle*sizeof(cl_float4),0);
   cl_float4* pos2 = (cl_float4*)clmalloc(stdgpu,nparticle*sizeof(cl_float4),0);
   cl_float4* vel = (cl_float4*)clmalloc(stdgpu,nparticle*sizeof(cl_float4),0);

   nbody_init(nparticle,pos1,vel);

   void* h = clopen(stdgpu,"nbody_kern.cl",CLLD_NOW);
   cl_kernel krn = clsym(stdgpu,h,"nbody_kern",CLLD_NOW);

   clndrange_t ndr = clndrange_init1d(0,nparticle,nthread);

   clarg_set(stdgpu,krn,0,dt);
   clarg_set(stdgpu,krn,1,eps);
   clarg_set_global(stdgpu,krn,4,vel);
   clarg_set_local(stdgpu,krn,5,nthread*sizeof(cl_float4));

	clmsync(stdgpu,0,pos1,CL_MEM_DEVICE|CL_EVENT_NOWAIT);
	clmsync(stdgpu,0,vel,CL_MEM_DEVICE|CL_EVENT_NOWAIT);

   for(step=0; step<nstep; step+=nburst) {

      for(burst=0; burst<nburst; burst+=2) {

         clarg_set_global(stdgpu,krn,2,pos1);
         clarg_set_global(stdgpu,krn,3,pos2);
         clfork(stdgpu,0,krn,&ndr,CL_EVENT_NOWAIT);

         clarg_set_global(stdgpu,krn,2,pos2);
         clarg_set_global(stdgpu,krn,3,pos1);
         clfork(stdgpu,0,krn,&ndr,CL_EVENT_NOWAIT);
      
      }

      clmsync(stdgpu,0,pos1,CL_MEM_HOST|CL_EVENT_NOWAIT);

      clwait(stdgpu,0,CL_KERNEL_EVENT|CL_MEM_EVENT);

   }

   nbody_output(nparticle,pos1,vel);

   clclose(stdgpu,h);

   clfree(pos1);
   clfree(pos2);
   clfree(vel);  
}
BasketGeometricOpenCLOption::~BasketGeometricOpenCLOption()
{
    free(start_prices);
    free(asset_volatilities);
    free(correlations);

    if(cl_start_prices) {
        clfree(cl_start_prices);
    }
    if(cl_asset_volatilities) {
        clfree(cl_asset_volatilities);
    }
    if(cl_correlations) {
        clfree(cl_correlations);
    }
}
Example #3
0
File: uhid.c Project: MarginC/kame
int
uhidclose(dev_t dev, int flag, int mode, usb_proc_ptr p)
{
	struct uhid_softc *sc;

	USB_GET_SC(uhid, UHIDUNIT(dev), sc);

	DPRINTF(("uhidclose: sc=%p\n", sc));

	/* Disable interrupts. */
	usbd_abort_pipe(sc->sc_intrpipe);
	usbd_close_pipe(sc->sc_intrpipe);
	sc->sc_intrpipe = 0;

	ndflush(&sc->sc_q, sc->sc_q.c_cc);
	clfree(&sc->sc_q);

	free(sc->sc_ibuf, M_USBDEV);
	free(sc->sc_obuf, M_USBDEV);

	sc->sc_state &= ~UHID_OPEN;

	sc->sc_async = 0;

	return (0);
}
void BasketGeometricOpenCLOption::cleanup()
{
    if(cl_start_prices) {
        clfree(cl_start_prices);
    }
    if(cl_asset_volatilities) {
        clfree(cl_asset_volatilities);
    }
    if(cl_correlations) {
        clfree(cl_correlations);
    }

    cl_start_prices = NULL;
    cl_asset_volatilities = NULL;
    cl_correlations = NULL;
}
Example #5
0
/*
 * Tests memory allocation on the GPU by reading raster data in and,
 * after transfering it to the device, read it back and creating an
 * output raster that should be identical to the input one.
 *
 * int nrows .............. the number of rows in the DEM.
 * int ncols .............. the number of columns in the DEM.
 * int infd ............... file descriptor to read from the DEM.
 * int outfd .............. file descriptor to write to an output raster.
 * RASTER_MAP_TYPE rtype .. DEM element data type.
 *
 */
void test_cl (const int nrows, const int ncols,
              const int infd, const int outfd,
              RASTER_MAP_TYPE rtype)
{
    int row, col;

    CONTEXT * ctx = init_context ( );

    /* load raster DEM data */
    FCELL *dem_device = load_raster (ctx, nrows, ncols, infd, rtype);

    G_message ("Transfering raster data to the device ...");

    clmsync (ctx, 0, dem_device, CL_MEM_DEVICE|CL_EVENT_WAIT);

    G_message ("Transfering data from the device to the host ...");

    clmsync (ctx, 0, dem_device, CL_MEM_HOST|CL_EVENT_WAIT);

    /* save raster data */
    save_raster (dem_device, nrows, ncols, outfd);

    /* free allocated resources */
    clfree (dem_device);
}
Example #6
0
int
ugen_do_close(struct ugen_softc *sc, int endpt, int flag)
{
	struct ugen_endpoint *sce;
	int dir, i;

#ifdef DIAGNOSTIC
	if (!sc->sc_is_open[endpt]) {
		printf("ugenclose: not open\n");
		return (EINVAL);
	}
#endif

	if (endpt == USB_CONTROL_ENDPOINT) {
		DPRINTFN(5, ("ugenclose: close control\n"));
		sc->sc_is_open[endpt] = 0;
		return (0);
	}

	for (dir = OUT; dir <= IN; dir++) {
		if (!(flag & (dir == OUT ? FWRITE : FREAD)))
			continue;
		sce = &sc->sc_endpoints[endpt][dir];
		if (sce == NULL || sce->pipeh == NULL)
			continue;
		DPRINTFN(5, ("ugenclose: endpt=%d dir=%d sce=%p\n",
			     endpt, dir, sce));

		usbd_close_pipe(sce->pipeh);
		sce->pipeh = NULL;

		switch (sce->edesc->bmAttributes & UE_XFERTYPE) {
		case UE_INTERRUPT:
			ndflush(&sce->q, sce->q.c_cc);
			clfree(&sce->q);
			break;
		case UE_ISOCHRONOUS:
			for (i = 0; i < UGEN_NISOREQS; ++i)
				usbd_free_xfer(sce->isoreqs[i].xfer);

		default:
			break;
		}

		if (sce->ibuf != NULL) {
			free(sce->ibuf, M_USBDEV, 0);
			sce->ibuf = NULL;
		}
	}
	sc->sc_is_open[endpt] = 0;

	return (0);
}
void BasketGeometricOpenCLOption::setup_inputs()
{
    if(cl_start_prices) {
        clfree(cl_start_prices);
    }
    if(cl_asset_volatilities) {
        clfree(cl_asset_volatilities);
    }
    if(cl_correlations) {
        clfree(cl_correlations);
    }

    cl_start_prices = opencl_memcpy<cl_float, float>(context, number_of_assets, start_prices);
    cl_asset_volatilities = opencl_memcpy<cl_float, float>(context, number_of_assets, asset_volatilities);
    cl_correlations = opencl_memcpy<cl_float, float>(context, number_of_assets*number_of_assets, correlations);

    assert(cl_start_prices != NULL);
    assert(cl_asset_volatilities != NULL);
    assert(cl_correlations != NULL);

    clmsync(context, device_number, cl_start_prices, CL_MEM_DEVICE|CL_EVENT_NOWAIT);
    clmsync(context, device_number, cl_asset_volatilities, CL_MEM_DEVICE|CL_EVENT_NOWAIT);
    clmsync(context, device_number, cl_correlations, CL_MEM_DEVICE|CL_EVENT_NOWAIT);
}
Example #8
0
int
uhidclose(dev_t dev, int flag, int mode, struct proc *p)
{
	struct uhid_softc *sc;

	sc = uhid_cd.cd_devs[UHIDUNIT(dev)];

	DPRINTF(("uhidclose: sc=%p\n", sc));

	clfree(&sc->sc_q);
	free(sc->sc_obuf, M_USBDEV);
	sc->sc_async = NULL;
	uhidev_close(&sc->sc_hdev);

	return (0);
}
Example #9
0
void cexit() {
	printf(ANSI_COLOR_BLUE"Saliendo!" ANSI_COLOR_RESET "\n");
	clfree();
	handOff(0);
	exit(0);
}
Example #10
0
File: nD.cpp Project: nickoppen/nD
int main (int argc, char * argv[])
{

   char kernFile[] = "../src/nD.cl";	// the default location of the cl file - can be over ridden by the first command line arguement
   char debugFile[] = "./debug";		// the debug file location
   char msg[255];						// space for the debug message where a program variable is to be written to the debug file
   char * clFile;						// the cl file string actually used (either argv or kernFile)
   void * openHandle;					// the return value to clOpen
   int	bytesPerCore = 16;				// how many bytes we want each core to process
   int workItems = 32; 					// the total number workItems (threads sent to the Epiphany)
   int	i;								// loop counter
   cl_uchar * wrkArea1D;					// the pointer to the malloc'd space (bytesPerCore * workItems)
   cl_uchar * wrkArea2D;

//	These variables will be useful when I get getDeviceInfo and getBuildInfo working
//   unsigned int space;					// the space required for clGetInfo style calls
//   cl_build_status bOk;					// the return value from clGetProgramBuildInfo
//   size_t computeUnits;					// return from GetDeviceInfo
//   char strInfo[20];

   FILE * pFile;

   if(argc == 2)
	   clFile = argv[1];
   else
	   clFile = kernFile;

   pFile = fopen(clFile, "r");
   if (pFile == NULL)
   {
	   printf("Opening the Kernel file: %s produced an error(%d). Make sure that the source code variable kern has a valid path to the cl code and that the code is readable.\n", clFile, errno);
	   exit(0);
   }
   else
	   fclose(pFile);	// only open the file to check that it is there and readable


   debugReset(debugFile);
   debugdebug(debugFile, (char*)"How many devices do we have?\n");


   sprintf(msg, "About to malloc wrkArea1D: %d\n", workItems * bytesPerCore);
   debugdebug(debugFile, msg);
   wrkArea1D = (cl_uchar*) clmalloc(stdacc, workItems * bytesPerCore, 0);
   wrkArea2D = (cl_uchar*) clmalloc(stdacc, workItems * bytesPerCore, 0);

   for (i=0; i < workItems * bytesPerCore; i++)
	   wrkArea2D[i] = wrkArea1D[i] = 0;

   sprintf(msg, "Well malloc worked! Opening kernel file:%s\n", clFile);
   debugdebug(debugFile, msg);
   openHandle = clopen(stdacc, clFile, CLLD_NOW);
   // open the standard accellerator context (i.e. the Epiphany chip, reading in the .cl file and compiling it immediately

   clndrange_t ndr1D = clndrange_init1d(NULL,							// global offset (always zero)
		   	   	   	   	   	   	   ((size_t)workItems),					// total number of threads (get_global_id will return 0 to workItems
		   	   	   	   	   	   	   ((size_t)bytesPerCore));				// How many bytes do we tell the kernel to process via get_local_size(0)
   exKernel(openHandle, &ndr1D, (char*)"k_init1D", workItems, bytesPerCore, wrkArea1D, debugFile);

   clndrange_t ndr2D = clndrange_init2d(NULL,							// global offset (always zero)
		   	   	   	   	   	   	   ((size_t)workItems),					// total number of threads (get_global_id will return 0 to workItems
		   	   	   	   	   	   	   ((size_t)(bytesPerCore/4)),			// How many bytes do we tell the kernel to process via get_local_size(0)
		   	   	   	   	   	   	   NULL,								// another useless global offset
		   	   	   	   	   	   	   ((size_t)workItems),					// a value that does not seem to do anything useful
		   	   	   	   	   	   	   ((size_t)(bytesPerCore/4)));			// How many rows to process per call returned by get_local_size(1)

   exKernel(openHandle, &ndr2D, (char*)"k_init2D", workItems, bytesPerCore, wrkArea2D, debugFile);


   // ============================================================================================================
   // show the results
   // ============================================================================================================

   printf("The 1D data:\n");
   for(i=0; i < workItems * bytesPerCore; i++)
   {
	   printf("%u\t", wrkArea1D[i]);
	   if(((i+1) % bytesPerCore) == 0)
		   printf("\n");
   }

   printf("The 2D data:\n");
   for(i=0; i < workItems * bytesPerCore; i++)
   {
	   printf("%u\t", wrkArea2D[i]);
	   if(((i+1) % bytesPerCore) == 0)
		   printf("\n");
   }

   clfree(wrkArea1D);
   clfree(wrkArea2D);

   return 0;
}
Example #11
0
int main()
{
   cl_uint n = 64;

#if(1)

	/* use default contexts, if no GPU use CPU */
   CLCONTEXT* cp = (stdgpu)? stdgpu : stdcpu;

   unsigned int devnum = 0;

   void* clh = clopen(cp,"matvecmult.cl",CLLD_NOW);
   cl_kernel krn = clsym(cp,clh,"matvecmult_kern",0);

   /* allocate OpenCL device-sharable memory */
   cl_float* aa = (float*)clmalloc(cp,n*n*sizeof(cl_float),0);
   cl_float* b = (float*)clmalloc(cp,n*sizeof(cl_float),0);
   cl_float* c = (float*)clmalloc(cp,n*sizeof(cl_float),0);

   clndrange_t ndr = clndrange_init1d( 0, n, 64);

   /* initialize vectors a[] and b[], zero c[] */
   int i,j; 
   for(i=0;i<n;i++) for(j=0;j<n;j++) aa[i*n+j] = 1.1f*i*j;
   for(i=0;i<n;i++) b[i] = 2.2f*i;
   for(i=0;i<n;i++) c[i] = 0.0f;

   /* define the computational domain and workgroup size */
   //clndrange_t ndr = clndrange_init1d( 0, n, 64);

   /* non-blocking sync vectors a and b to device memory (copy to GPU)*/
   clmsync(cp,devnum,aa,CL_MEM_DEVICE|CL_EVENT_NOWAIT);
   clmsync(cp,devnum,b,CL_MEM_DEVICE|CL_EVENT_NOWAIT);

   /* set the kernel arguments */
   clarg_set(cp,krn,0,n);
   clarg_set_global(cp,krn,1,aa);
   clarg_set_global(cp,krn,2,b);
   clarg_set_global(cp,krn,3,c);

   /* non-blocking fork of the OpenCL kernel to execute on the GPU */
   clfork(cp,devnum,krn,&ndr,CL_EVENT_NOWAIT);

   /* non-blocking sync vector c to host memory (copy back to host) */
   clmsync(cp,0,c,CL_MEM_HOST|CL_EVENT_NOWAIT);

   /* force execution of operations in command queue (non-blocking call) */
   clflush(cp,devnum,0);

   /* block on completion of operations in command queue */
   clwait(cp,devnum,CL_ALL_EVENT);

   for(i=0;i<n;i++) printf("%d %f %f\n",i,b[i],c[i]);

   clfree(aa);
   clfree(b);
   clfree(c);

   clclose(cp,clh);

#endif

}
Example #12
0
int
ugenopen(dev_t dev, int flag, int mode, struct proc *p)
{
	struct ugen_softc *sc;
	int unit = UGENUNIT(dev);
	int endpt = UGENENDPOINT(dev);
	usb_endpoint_descriptor_t *edesc;
	struct ugen_endpoint *sce;
	int dir, isize;
	usbd_status err;
	struct usbd_xfer *xfer;
	void *buf;
	int i, j;

	if (unit >= ugen_cd.cd_ndevs)
		return (ENXIO);
	sc = ugen_cd.cd_devs[unit];
	if (sc == NULL)
		return (ENXIO);

	DPRINTFN(5, ("ugenopen: flag=%d, mode=%d, unit=%d endpt=%d\n",
		     flag, mode, unit, endpt));

	if (sc == NULL || usbd_is_dying(sc->sc_udev))
		return (ENXIO);

	if (sc->sc_is_open[endpt])
		return (EBUSY);

	if (endpt == USB_CONTROL_ENDPOINT) {
		sc->sc_is_open[USB_CONTROL_ENDPOINT] = 1;
		return (0);
	}

	/* Make sure there are pipes for all directions. */
	for (dir = OUT; dir <= IN; dir++) {
		if (flag & (dir == OUT ? FWRITE : FREAD)) {
			sce = &sc->sc_endpoints[endpt][dir];
			if (sce == 0 || sce->edesc == 0)
				return (ENXIO);
		}
	}

	/* Actually open the pipes. */
	/* XXX Should back out properly if it fails. */
	for (dir = OUT; dir <= IN; dir++) {
		if (!(flag & (dir == OUT ? FWRITE : FREAD)))
			continue;
		sce = &sc->sc_endpoints[endpt][dir];
		sce->state = 0;
		sce->timeout = USBD_NO_TIMEOUT;
		DPRINTFN(5, ("ugenopen: sc=%p, endpt=%d, dir=%d, sce=%p\n",
			     sc, endpt, dir, sce));
		edesc = sce->edesc;
		switch (edesc->bmAttributes & UE_XFERTYPE) {
		case UE_INTERRUPT:
			if (dir == OUT) {
				err = usbd_open_pipe(sce->iface,
				    edesc->bEndpointAddress, 0, &sce->pipeh);
				if (err)
					return (EIO);
				break;
			}
			isize = UGETW(edesc->wMaxPacketSize);
			if (isize == 0)	/* shouldn't happen */
				return (EINVAL);
			sce->ibuf = malloc(isize, M_USBDEV, M_WAITOK);
			DPRINTFN(5, ("ugenopen: intr endpt=%d,isize=%d\n",
				     endpt, isize));
			clalloc(&sce->q, UGEN_IBSIZE, 0);
			err = usbd_open_pipe_intr(sce->iface,
				  edesc->bEndpointAddress,
				  USBD_SHORT_XFER_OK, &sce->pipeh, sce,
				  sce->ibuf, isize, ugenintr,
				  USBD_DEFAULT_INTERVAL);
			if (err) {
				free(sce->ibuf, M_USBDEV, 0);
				clfree(&sce->q);
				return (EIO);
			}
			DPRINTFN(5, ("ugenopen: interrupt open done\n"));
			break;
		case UE_BULK:
			err = usbd_open_pipe(sce->iface,
				  edesc->bEndpointAddress, 0, &sce->pipeh);
			if (err)
				return (EIO);
			break;
		case UE_ISOCHRONOUS:
			if (dir == OUT)
				return (EINVAL);
			isize = UGETW(edesc->wMaxPacketSize);
			if (isize == 0)	/* shouldn't happen */
				return (EINVAL);
			sce->ibuf = malloc(isize * UGEN_NISOFRAMES,
				M_USBDEV, M_WAITOK);
			sce->cur = sce->fill = sce->ibuf;
			sce->limit = sce->ibuf + isize * UGEN_NISOFRAMES;
			DPRINTFN(5, ("ugenopen: isoc endpt=%d, isize=%d\n",
				     endpt, isize));
			err = usbd_open_pipe(sce->iface,
				  edesc->bEndpointAddress, 0, &sce->pipeh);
			if (err) {
				free(sce->ibuf, M_USBDEV, 0);
				return (EIO);
			}
			for(i = 0; i < UGEN_NISOREQS; ++i) {
				sce->isoreqs[i].sce = sce;
				xfer = usbd_alloc_xfer(sc->sc_udev);
				if (xfer == 0)
					goto bad;
				sce->isoreqs[i].xfer = xfer;
				buf = usbd_alloc_buffer
					(xfer, isize * UGEN_NISORFRMS);
				if (buf == 0) {
					i++;
					goto bad;
				}
				sce->isoreqs[i].dmabuf = buf;
				for(j = 0; j < UGEN_NISORFRMS; ++j)
					sce->isoreqs[i].sizes[j] = isize;
				usbd_setup_isoc_xfer
					(xfer, sce->pipeh, &sce->isoreqs[i],
					 sce->isoreqs[i].sizes,
					 UGEN_NISORFRMS, USBD_NO_COPY,
					 ugen_isoc_rintr);
				(void)usbd_transfer(xfer);
			}
			DPRINTFN(5, ("ugenopen: isoc open done\n"));
			break;
		bad:
			while (--i >= 0) /* implicit buffer free */
				usbd_free_xfer(sce->isoreqs[i].xfer);
			return (ENOMEM);
		case UE_CONTROL:
			sce->timeout = USBD_DEFAULT_TIMEOUT;
			return (EINVAL);
		}
	}
	sc->sc_is_open[endpt] = 1;
	return (0);
}
int main()
{
   cl_uint n = 1024;

	/* use default contexts, if no GPU use CPU */
   CLCONTEXT* cp = (stdgpu)? stdgpu : stdcpu;

   unsigned int devnum = 0;

#ifdef __FreeBSD__
   void* clh = clopen(cp,"matvecmult_special.cl",CLLD_NOW);
   cl_kernel krn = clsym(cp,clh,"matvecmult_special_kern",0);
#else
   cl_kernel krn = clsym(cp,0,"matvecmult_special_kern",0);
#endif

   /* allocate OpenCL device-sharable memory */
   cl_float* aa = (float*)clmalloc(cp,n*n*sizeof(cl_float),0);
   cl_float* b = (float*)clmalloc(cp,n*sizeof(cl_float),0);
   cl_float* c = (float*)clmalloc(cp,n*sizeof(cl_float),0);

   /* initialize vectors a[] and b[], zero c[] */
   int i,j; 
   for(i=0;i<n;i++) for(j=0;j<n;j++) aa[i*n+j] = 1.1f*i*j;
   for(i=0;i<n;i++) b[i] = 2.2f*i;
   for(i=0;i<n;i++) c[i] = 0.0f;


	/***
	 *** Create a image2d allocation to be used as a read-only table.
	 *** The table will consist of a 24x24 array of float coefficients.
	 *** The clmctl() call is used to set the type and shape of the table.
	 *** Note that we will only use the first component of the float4 elements.
	 ***/	
	cl_float4* table 
		= (cl_float4*)clmalloc(cp,24*24*sizeof(cl_float4),CL_MEM_DETACHED);
	clmctl(table,CL_MCTL_SET_IMAGE2D,24,24,0);
	clmattach(cp,table);

	/* initialize the table to some contrived values */
	for(i=0;i<24;i++) for(j=0;j<24;j++) table[i*24+j].x = 0.125f*(i-j);


   /* define the computational domain and workgroup size */
   clndrange_t ndr = clndrange_init1d( 0, n, 64);

   /* non-blocking sync vectors a and b to device memory (copy to GPU)*/
   clmsync(cp,devnum,aa,CL_MEM_DEVICE|CL_EVENT_NOWAIT);
   clmsync(cp,devnum,b,CL_MEM_DEVICE|CL_EVENT_NOWAIT);
   clmsync(cp,devnum,table,CL_MEM_DEVICE|CL_EVENT_NOWAIT);

   /* set the kernel arguments */
   clarg_set(cp,krn,0,n);
   clarg_set_global(cp,krn,1,aa);
   clarg_set_global(cp,krn,2,b);
   clarg_set_global(cp,krn,3,c);
   clarg_set_global(cp,krn,4,table);

   /* non-blocking fork of the OpenCL kernel to execute on the GPU */
   clfork(cp,devnum,krn,&ndr,CL_EVENT_NOWAIT);

   /* non-blocking sync vector c to host memory (copy back to host) */
   clmsync(cp,0,c,CL_MEM_HOST|CL_EVENT_NOWAIT);

   /* block on completion of operations in command queue */
   clwait(cp,devnum,CL_ALL_EVENT);

   for(i=0;i<n;i++) printf("%d %f %f\n",i,b[i],c[i]);

   clfree(aa);
   clfree(b);
   clfree(c);

#ifdef __FreeBSD__
	clclose(cp,clh);
#endif

}
int main()
{
   cl_uint n = 1024;

	/* use default contexts, if no ACCELERATOR use CPU */
   CLCONTEXT* cp = (stdacc)? stdacc : stdcpu;

   unsigned int devnum = 0;

	/******************************************************************
	 *** this example requires the .cl file to be available at run-time
	 *** and shows how to pass compiler options to the OCL compiler
	 ******************************************************************/

   void* clh = clopen(cp,"outerprod.cl",CLLD_NOBUILD);
   clbuild(cp,clh,"-D COEF=2", 0);
   cl_kernel krn = clsym(cp,clh,"outerprod_kern",0);

	if (!krn) { fprintf(stderr,"error: no OpenCL kernel\n"); exit(-1); }

   /* allocate OpenCL device-sharable memory */
   cl_float* a = (float*)clmalloc(cp,n*sizeof(cl_float),0);
   cl_float* b = (float*)clmalloc(cp,n*sizeof(cl_float),0);
   cl_float* c = (float*)clmalloc(cp,n*sizeof(cl_float),0);

   /* initialize vectors a[] and b[], zero c[] */
   int i; 
   for(i=0;i<n;i++) a[i] = 1.1f*i;
   for(i=0;i<n;i++) b[i] = 2.2f*i;
   for(i=0;i<n;i++) c[i] = 0.0f;

   /* non-blocking sync vectors a and b to device memory (copy to GPU)*/
   clmsync(cp,devnum,a,CL_MEM_DEVICE|CL_EVENT_WAIT);
   clmsync(cp,devnum,b,CL_MEM_DEVICE|CL_EVENT_WAIT);

   /* define the computational domain and workgroup size */
   clndrange_t ndr = clndrange_init1d( 0, n, 16);

   /* set the kernel arguments */
   clarg_set_global(cp,krn,0,a);
   clarg_set_global(cp,krn,1,b);
   clarg_set_global(cp,krn,2,c);

   /* non-blocking fork of the OpenCL kernel to execute on the GPU */
   clfork(cp,devnum,krn,&ndr,CL_EVENT_NOWAIT);

   /* block on completion of operations in command queue */
   clwait(cp,devnum,CL_ALL_EVENT);

   /* non-blocking sync vector c to host memory (copy back to host) */
   clmsync(cp,0,c,CL_MEM_HOST|CL_EVENT_NOWAIT);

   /* block on completion of operations in command queue */
   clwait(cp,devnum,CL_ALL_EVENT);

   for(i=0;i<n;i++) printf("%d %f %f %f\n",i,a[i],b[i],c[i]);

   clfree(a);
   clfree(b);
   clfree(c);

   clclose(cp,clh);
}
Example #15
0
/*
 * Calculates the path loss matrix for many transmitters and saves the
 * output to one common raster map.
 *
 * int nrows .............. the number of rows in the DEM.
 * int ncols .............. the number of columns in the DEM.
 * int tx_coord_col ....... transmitter column coordinate (in raster cells).
 * int tx_coord_row ....... transmitter row coordinate (in raster cells).
 * int tx_elevation ....... transmitter height above sea level.
 * int anthena_height ..... height of transmitter anthena above ground.
 * float rx_height ........ receiver height above ground.
 * float frequency ........ transmitter frequency in Mhz.
 * int radius ............. calculation radius around to the transmitter
 *                          (in raster cells).
 * int infd ............... file descriptor to read from the DEM.
 * int outfd .............. file descriptor to write resulting raster.
 * RASTER_MAP_TYPE rtype .. DEM element data type.
 *
 */
void hata_cl (const int nrows, const int ncols,
              const int tx_coord_col, const int tx_coord_row,
              const int tx_elevation, const int tx_anthena_height,
              const float rx_height, const float frequency,
              const int radius,
              const int infd, const int outfd, 
              RASTER_MAP_TYPE rtype)
{
    int i;

    /* number of processing tiles needed around each transmitter */
    int ntile = (radius*2) / _HATA_WITEM_PER_DIM_ + 1;

    G_message ("Receiver height above ground is %5.2f mts", rx_height);
    G_message ("Transmitter frequency is %7.2f Mhz", frequency);
    G_message ("Calculation radius is %d raster cells", radius);

    CONTEXT * ctx = init_context ( );

    /* initialize raster data */
    void *dem_in = load_raster (ctx, nrows, ncols, infd, rtype);
    void *dem_out = null_raster (ctx, nrows, ncols, rtype, NULL);

    /* load, compile and link the OpenCL kernel file */
    void* h = clopen (ctx, NULL, CLLD_NOW);
    /* select a kernel function from the loaded file */
    cl_kernel krn = clsym (ctx, h, "hata_urban_kern", CLLD_NOW);

    /* defines a 2D matrix of work groups & work items */
    size_t global_wsize = ntile * _HATA_WITEM_PER_DIM_;
    size_t local_wsize = _HATA_WITEM_PER_DIM_;

    if ((global_wsize % local_wsize) != 0)
        G_fatal_error ("Work group dimensions are incorrect.");
    else
        G_message ("Creating %d work-group(s) of [%d x %d] threads each", 
                   global_wsize/local_wsize,
                   local_wsize, local_wsize);

    clndrange_t ndr = clndrange_init2d (0,global_wsize,local_wsize, 
                                        0,global_wsize,local_wsize);

    // correction factor ahr
    float ahr = (1.1f*log10(frequency) - 0.7f)*rx_height - 
                (1.56f*log10(frequency) - 0.8f);

    /* set kernel parameters and local memory size */
    clarg_set (ctx, krn, 0, ncols);
    clarg_set (ctx, krn, 3, rx_height);
    clarg_set (ctx, krn, 4, frequency);
    clarg_set (ctx, krn, 5, ahr);

    size_t lmem_size = _HATA_WITEM_PER_DIM_*_HATA_WITEM_PER_DIM_*
                       sizeof(cl_float2);

    if (lmem_size > _HATA_MAX_LOCAL_MEM_)
        G_fatal_error ("Allocated local memory (%d) exceeds %d bytes.", lmem_size,
                                                                        _HATA_MAX_LOCAL_MEM_);

    clarg_set_local (ctx, krn, 8, lmem_size);

    /* transfer data to the device */
    clmsync (ctx, 0, dem_in, CL_MEM_DEVICE|CL_EVENT_NOWAIT);
    clmsync (ctx, 0, dem_out, CL_MEM_DEVICE|CL_EVENT_NOWAIT);

    /* set remaining kernel parameters */
    clarg_set_global (ctx, krn, 6, dem_in);
    clarg_set_global (ctx, krn, 7, dem_out);

    G_message ("Simulating ...");

    /* test transmitters

         ASNEBE | 1468 | 1200
         ASKZK  | 1395 | 1180
         ASPAR  | 1400 | 1245
         ASMARG | 1460 | 1203
         ASTARA | 1422 | 1235
         ASKZ   | 1396 | 1181
         ASENTP | 1387 | 1185
         ASISKP | 1399 | 1189
         ASEME  | 1448 | 1262
         ASOSTR | 1498 | 1253
         ASMELT | 1429 | 1196
         ASMART | 1446 | 1213
         ASLOM  | 1425 | 1226
         ASAZU  | 1421 | 1235
         ASONCE | 1433 | 1208
         ASMAR  | 1404 | 1150
         ASTEP  | 1450 | 1235
         ASAVLJ | 1417 | 1182
         ASTOZI | 1436 | 1198
         ASTRAL | 1422 | 1227
         ASTEG  | 1404 | 1193
    */
    const int ntest_tx = 21;
    cl_int2 *test_tx = malloc (ntest_tx * sizeof(cl_int2));

    test_tx[0] = (cl_int2) {1468,1200};
    test_tx[1] = (cl_int2) {1395,1180};
    test_tx[2] = (cl_int2) {1400,1245};
    test_tx[3] = (cl_int2) {1460,1203};
    test_tx[4] = (cl_int2) {1422,1235};
    test_tx[5] = (cl_int2) {1396,1181};
    test_tx[6] = (cl_int2) {1387,1185};
    test_tx[7] = (cl_int2) {1399,1189};
    test_tx[8] = (cl_int2) {1448,1262};
    test_tx[9] = (cl_int2) {1498,1253};
    test_tx[10] = (cl_int2) {1429,1196};
    test_tx[11] = (cl_int2) {1446,1213};
    test_tx[12] = (cl_int2) {1425,1226};
    test_tx[13] = (cl_int2) {1421,1235};
    test_tx[14] = (cl_int2) {1433,1208};
    test_tx[15] = (cl_int2) {1404,1150};
    test_tx[16] = (cl_int2) {1450,1235};
    test_tx[17] = (cl_int2) {1417,1182};
    test_tx[18] = (cl_int2) {1436,1198};
    test_tx[19] = (cl_int2) {1422,1227};
    test_tx[20] = (cl_int2) {1404,1193};

    /* for each transmitter */
    for (i = 0; i < ntest_tx; i++)
    {
        /* display completion percentage */
        G_percent (i, ntest_tx, 1);

        /* Transmitter coordinates */
        cl_int2 tx_coord = test_tx[i];

        //G_message ("The transmitter is located at %d, %d", tx_coord.x,
        //                                                   tx_coord.y);
        /* Transmitter heights */
        //G_message ("Transmitter height above sea level is %d mts", tx_elevation);
        //G_message ("Transmitter anthena height above ground is %d mts", tx_anthena_height);

        /* transmitter data */ 
        cl_int4 tx_data = (cl_int4) {tx_coord.x, tx_coord.y,
                                     tx_elevation, tx_anthena_height};

        /* tile offset within the raster */
        cl_int2 tx_offset = (cl_int2) {tx_coord.x - radius,
                                       tx_coord.y - radius};

        /* set (more) remaining kernel parameters */
        clarg_set (ctx, krn, 1, tx_data);
        clarg_set (ctx, krn, 2, tx_offset);

        /* start kernel execution */
        clfork (ctx, 0, krn, &ndr, CL_EVENT_NOWAIT);

        clwait (ctx, 0, CL_KERNEL_EVENT|CL_MEM_EVENT|CL_EVENT_RELEASE);
    }

    /* sync memory */
    clmsync (ctx, 0, dem_out, CL_MEM_HOST|CL_EVENT_WAIT);

    /* write the calculation output */
    save_raster (dem_out, nrows, ncols, outfd);

    /* free allocated resources */
    free (test_tx);
    clclose (ctx, h);
    clfree (dem_out);
    clfree (dem_in);
}
Example #16
0
/*
 * Calculates an interference matrix resulting from the tx power of all
 * transmitters combined with their respective path losses and the thermal
 * noise. The output is saved in one raster map.
 *
 * int nrows .............. the number of rows in the DEM.
 * int ncols .............. the number of columns in the DEM.
 * int tx_coord_col ....... transmitter column coordinate (in raster cells).
 * int tx_coord_row ....... transmitter row coordinate (in raster cells).
 * int tx_elevation ....... transmitter height above sea level.
 * int anthena_height ..... height of transmitter anthena above ground.
 * float rx_height ........ receiver height above ground.
 * float frequency ........ transmitter frequency in Mhz.
 * int radius ............. calculation radius around to the transmitter
 *                          (in raster cells).
 * int infd ............... file descriptor to read from the DEM.
 * int outfd .............. file descriptor to write the interference
 *                          data to.
 * RASTER_MAP_TYPE rtype .. DEM element data type.
 *
 */
void hata_interference_cl (const int nrows, const int ncols,
                           const int tx_coord_col, const int tx_coord_row,
                           const int tx_elevation, const int tx_anthena_height,
                           const float rx_height, const float frequency,
                           const int radius,
                           const int infd, const int outfd, 
                           RASTER_MAP_TYPE rtype)
{
    int i;

    /* number of processing tiles needed around each transmitter */
    int ntile = (radius*2) / _HATA_WITEM_PER_DIM_ + 1;

    G_message ("Receiver height above ground is %5.2f mts", rx_height);
    G_message ("Transmitter frequency is %7.2f Mhz", frequency);
    G_message ("Calculation radius is %d raster cells", radius);

    CONTEXT * ctx = init_context ( );

    /* initialize raster data */
    void *dem_in = load_raster (ctx, nrows, ncols, infd, rtype);

    /* interference matrix */
    void *qrm_out = alloc_raster (ctx, nrows, ncols, rtype, 
                                  (FCELL)_HATA_THERMAL_NOISE_, NULL);

    /* load, compile and link the OpenCL kernel file */
    void* h = clopen (ctx, NULL, CLLD_NOW);

    /* select a kernel function from the loaded file */
    cl_kernel krn = clsym (ctx, h, "hata_urban_interference", CLLD_NOW);

    /* defines a 2D matrix of work groups & work items */
    size_t global_wsize = ntile * _HATA_WITEM_PER_DIM_;
    size_t local_wsize = _HATA_WITEM_PER_DIM_;

    if ((global_wsize % local_wsize) != 0)
        G_fatal_error ("Work group dimensions are incorrect.");
    else
        G_message ("Creating %d work-group(s) of [%d x %d] threads each", 
                   global_wsize/local_wsize,
                   local_wsize, local_wsize);

    clndrange_t ndr = clndrange_init2d (0,global_wsize,local_wsize, 
                                        0,global_wsize,local_wsize);

    // correction factor ahr
    float ahr = (1.1f*log10(frequency) - 0.7f)*rx_height - 
                (1.56f*log10(frequency) - 0.8f);

    /* set kernel parameters and local memory size */
    clarg_set (ctx, krn, 0, ncols);
    clarg_set (ctx, krn, 3, rx_height);
    clarg_set (ctx, krn, 4, frequency);
    clarg_set (ctx, krn, 5, ahr);

    size_t lmem_size = _HATA_WITEM_PER_DIM_*_HATA_WITEM_PER_DIM_*
                       sizeof(cl_float2);

    if (lmem_size > _HATA_MAX_LOCAL_MEM_)
        G_fatal_error ("Allocated local memory (%d) exceeds %d bytes.", lmem_size,
                                                                        _HATA_MAX_LOCAL_MEM_);

    clarg_set_local (ctx, krn, 8, lmem_size);

    /* transfer data to the device */
    clmsync (ctx, 0, dem_in, CL_MEM_DEVICE|CL_EVENT_NOWAIT);
    clmsync (ctx, 0, qrm_out, CL_MEM_DEVICE|CL_EVENT_WAIT);

    /* set remaining kernel parameters */
    clarg_set_global (ctx, krn, 6, dem_in);
    clarg_set_global (ctx, krn, 7, qrm_out);

    G_message ("Simulating ...");

    /**
     * Test transmitters 
     * 
     * ASNEBE  | 1468 | 1200
     * ASKZK   | 1395 | 1180
     * ALEK    | 1408 | 1211
     * ANDREJ  | 1431 | 1212
     * AMALENA | 1462 | 1272
     * APOLJE  | 1483 | 1227
     * AMONS   | 1381 | 1230
     * ANTENA  | 1397 | 1238
     * ATOMAC  | 1443 | 1194
     * ATUNEL  | 1424 | 1239
     * AKOSET  | 1398 | 1206
     * ASPAR   | 1400 | 1245
     * ACHEMO  | 1431 | 1226
     * ALIVAD  | 1423 | 1248
     * ASMARG  | 1460 | 1203
     * ABTC    | 1451 | 1215
     * AZIMA   | 1471 | 1236
     * ASTARA  | 1422 | 1235
     * AVEGAS  | 1434 | 1257
     * APOVSE  | 1436 | 1232
     * ABTCST  | 1449 | 1213
     * AMOBI   | 1429 | 1222
     * APODUT  | 1378 | 1202
     * AOBZEL  | 1444 | 1222
     * AGRIC   | 1385 | 1207
     * ADELO   | 1421 | 1222
     * AJEZA   | 1459 | 1182
     * ASKZ    | 1396 | 1181
     * ATLK    | 1422 | 1225
     * AGR     | 1424 | 1220
     * ADNEVN  | 1426 | 1232
     * AMHOTE  | 1409 | 1212
     * AVIC    | 1414 | 1236
     * AJOZEF  | 1429 | 1234
     * ATVSLO  | 1425 | 1228
     * ACRNUC  | 1445 | 1172
     * ANGEL   | 1508 | 1219
     * AKOSEG  | 1401 | 1210
     * AMSC    | 1429 | 1222
     * AGRAM   | 1421 | 1195
     * ARAKTK  | 1436 | 1249
     * AIMKO   | 1445 | 1179
     * AKOVIN  | 1412 | 1214
     * ADOLGI  | 1388 | 1250
     * APETER  | 1431 | 1231
     * ASENTP  | 1387 | 1185
     * AOBI    | 1443 | 1268
     * AMZS    | 1427 | 1203
     * AVICTK  | 1403 | 1240
     * AZUR    | 1397 | 1238
     * AKAMNA  | 1388 | 1200
     * APOLCE  | 1482 | 1228
     * AKASEL  | 1497 | 1232
     * ASISKP  | 1399 | 1189
     * AKOZAR  | 1373 | 1247
     * ASEME   | 1448 | 1262
     * AURSKA  | 1424 | 1220
     * ABEZIT  | 1427 | 1199
     * AVEROV  | 1416 | 1201
     * ATEGR   | 1419 | 1209
     * ATRUB   | 1423 | 1231
     * AKOLIN  | 1436 | 1221
     * AVELAN  | 1439 | 1218
     * ABEZI   | 1431 | 1216
     * AGAMGD  | 1418 | 1148
     * AGURS   | 1431 | 1237
     * AGPG    | 1419 | 1237
     * AKOSME  | 1397 | 1210
     * AZALOG  | 1500 | 1225
     * ASOSTR  | 1498 | 1253
     * AROZNA  | 1404 | 1235
     * AVRHO   | 1380 | 1242
     * ARAK    | 1430 | 1244
     * AVILA   | 1404 | 1230
     * AKOLEZ  | 1413 | 1241
     * AJEZKZ  | 1427 | 1185
     * AVIZMA  | 1386 | 1173
     * AKOTNI  | 1429 | 1226
     * AMOSTE  | 1443 | 1227
     * ATOPNI  | 1425 | 1212
     * AGZ     | 1429 | 1209
     * APRZAC  | 1386 | 1192
     * ALITIJ  | 1455 | 1235
     * ATEHNO  | 1387 | 1233
     * ASMELT  | 1429 | 1196
     * ACRGMA  | 1439 | 1166
     * AFORD   | 1439 | 1215
     * ADRAVC  | 1396 | 1197
     * ABROD   | 1388 | 1168
     * ATABOR  | 1429 | 1228
     * ATRNOV  | 1421 | 1242
     * AGOLOV  | 1467 | 1263
     * ARESEV  | 1438 | 1228
     * ABTCMI  | 1450 | 1216
     * ASMART  | 1446 | 1213
     * APROGA  | 1382 | 1251
     * ABRDO   | 1382 | 1235
     * AKONEX  | 1387 | 1237
     * AZITO   | 1456 | 1213
     * ASLOM   | 1425 | 1226
     * APLAMA  | 1448 | 1202
     * AGRAD   | 1470 | 1218
     * AFUZIN  | 1463 | 1228
     * ASAZU   | 1421 | 1235
     * ATOTRA  | 1463 | 1233
     * AHALA   | 1414 | 1221
     * ASONCE  | 1433 | 1208
     * ALMLEK  | 1422 | 1202
     * ACERNE  | 1415 | 1212
     * ADRAV   | 1396 | 1201
     * ALGRAD  | 1424 | 1234
     * ALPP    | 1406 | 1205
     * ANAMA   | 1420 | 1231
     * ATIVO   | 1418 | 1226
     * AZAPUZ  | 1392 | 1196
     * ASMAR   | 1404 | 1150
     * ACANK   | 1416 | 1233
     * ATACGD  | 1390 | 1156
     * APOPTV  | 1435 | 1211
     * AFRANK  | 1417 | 1220
     * ASTEP   | 1450 | 1235
     * AEJATA  | 1445 | 1218
     * AZADOB  | 1482 | 1206
     * AELNA   | 1436 | 1177
     * AZELEZ  | 1428 | 1218
     * AMURGL  | 1415 | 1246
     * ACIGO   | 1423 | 1225
     * AIJS    | 1407 | 1241
     * AELOK   | 1459 | 1225
     * ATACBR  | 1385 | 1160
     * AJELSA  | 1416 | 1259
     * AVRHOV  | 1394 | 1242
     * ABIZOV  | 1472 | 1247
     * ACITYP  | 1454 | 1214
     * ASAVLJ  | 1417 | 1182
     * AKLINI  | 1434 | 1228
     * AKODEL  | 1439 | 1236
     * ASTOZI  | 1436 | 1198
     * AKORA   | 1422 | 1226
     * AHLEV   | 1388 | 1163
     * ATOPLA  | 1451 | 1224
     * AMALEN  | 1461 | 1273
     * AVECNA  | 1391 | 1222
     * ANADGO  | 1462 | 1178
     * ABOKAL  | 1379 | 1231
     * AVARNO  | 1404 | 1250
     * ASTRAL  | 1422 | 1227
     * AVEVCE  | 1488 | 1234
     * ARNC    | 1423 | 1225
     * AGEOPL  | 1406 | 1198
     * AMOBLI  | 1415 | 1200
     * AVLADA  | 1415 | 1234
     * ASTEG   | 1404 | 1193
     * AKOSEZ  | 1392 | 1207
     */

    const int ntest_tx = 154;
    cl_int2 *test_tx = malloc (ntest_tx * sizeof(cl_int2));

    test_tx[0] = (cl_int2) {1468,1200};
    test_tx[1] = (cl_int2) {1395,1180};
    test_tx[2] = (cl_int2) {1408,1211};
    test_tx[3] = (cl_int2) {1431,1212};
    test_tx[4] = (cl_int2) {1462,1272};
    test_tx[5] = (cl_int2) {1483,1227};
    test_tx[6] = (cl_int2) {1381,1230};
    test_tx[7] = (cl_int2) {1397,1238};
    test_tx[8] = (cl_int2) {1443,1194};
    test_tx[9] = (cl_int2) {1424,1239};
    test_tx[10] = (cl_int2) {1398,1206};
    test_tx[11] = (cl_int2) {1400,1245};
    test_tx[12] = (cl_int2) {1431,1226};
    test_tx[13] = (cl_int2) {1423,1248};
    test_tx[14] = (cl_int2) {1460,1203};
    test_tx[15] = (cl_int2) {1451,1215};
    test_tx[16] = (cl_int2) {1471,1236};
    test_tx[17] = (cl_int2) {1422,1235};
    test_tx[18] = (cl_int2) {1434,1257};
    test_tx[19] = (cl_int2) {1436,1232};
    test_tx[20] = (cl_int2) {1449,1213};
    test_tx[21] = (cl_int2) {1429,1222};
    test_tx[22] = (cl_int2) {1378,1202};
    test_tx[23] = (cl_int2) {1444,1222};
    test_tx[24] = (cl_int2) {1385,1207};
    test_tx[25] = (cl_int2) {1421,1222};
    test_tx[26] = (cl_int2) {1459,1182};
    test_tx[27] = (cl_int2) {1396,1181};
    test_tx[28] = (cl_int2) {1422,1225};
    test_tx[29] = (cl_int2) {1424,1220};
    test_tx[30] = (cl_int2) {1426,1232};
    test_tx[31] = (cl_int2) {1409,1212};
    test_tx[32] = (cl_int2) {1414,1236};
    test_tx[33] = (cl_int2) {1429,1234};
    test_tx[34] = (cl_int2) {1425,1228};
    test_tx[35] = (cl_int2) {1445,1172};
    test_tx[36] = (cl_int2) {1508,1219};
    test_tx[37] = (cl_int2) {1401,1210};
    test_tx[38] = (cl_int2) {1429,1222};
    test_tx[39] = (cl_int2) {1421,1195};
    test_tx[40] = (cl_int2) {1436,1249};
    test_tx[41] = (cl_int2) {1445,1179};
    test_tx[42] = (cl_int2) {1412,1214};
    test_tx[43] = (cl_int2) {1388,1250};
    test_tx[44] = (cl_int2) {1431,1231};
    test_tx[45] = (cl_int2) {1387,1185};
    test_tx[46] = (cl_int2) {1443,1268};
    test_tx[47] = (cl_int2) {1427,1203};
    test_tx[48] = (cl_int2) {1403,1240};
    test_tx[49] = (cl_int2) {1397,1238};
    test_tx[50] = (cl_int2) {1388,1200};
    test_tx[51] = (cl_int2) {1482,1228};
    test_tx[52] = (cl_int2) {1497,1232};
    test_tx[53] = (cl_int2) {1399,1189};
    test_tx[54] = (cl_int2) {1373,1247};
    test_tx[55] = (cl_int2) {1448,1262};
    test_tx[56] = (cl_int2) {1424,1220};
    test_tx[57] = (cl_int2) {1427,1199};
    test_tx[58] = (cl_int2) {1416,1201};
    test_tx[59] = (cl_int2) {1419,1209};
    test_tx[60] = (cl_int2) {1423,1231};
    test_tx[61] = (cl_int2) {1436,1221};
    test_tx[62] = (cl_int2) {1439,1218};
    test_tx[63] = (cl_int2) {1431,1216};
    test_tx[64] = (cl_int2) {1418,1148};
    test_tx[65] = (cl_int2) {1431,1237};
    test_tx[66] = (cl_int2) {1419,1237};
    test_tx[67] = (cl_int2) {1397,1210};
    test_tx[68] = (cl_int2) {1500,1225};
    test_tx[69] = (cl_int2) {1498,1253};
    test_tx[70] = (cl_int2) {1404,1235};
    test_tx[71] = (cl_int2) {1380,1242};
    test_tx[72] = (cl_int2) {1430,1244};
    test_tx[73] = (cl_int2) {1404,1230};
    test_tx[74] = (cl_int2) {1413,1241};
    test_tx[75] = (cl_int2) {1427,1185};
    test_tx[76] = (cl_int2) {1386,1173};
    test_tx[77] = (cl_int2) {1429,1226};
    test_tx[78] = (cl_int2) {1443,1227};
    test_tx[79] = (cl_int2) {1425,1212};
    test_tx[80] = (cl_int2) {1429,1209};
    test_tx[81] = (cl_int2) {1386,1192};
    test_tx[82] = (cl_int2) {1455,1235};
    test_tx[83] = (cl_int2) {1387,1233};
    test_tx[84] = (cl_int2) {1429,1196};
    test_tx[85] = (cl_int2) {1439,1166};
    test_tx[86] = (cl_int2) {1439,1215};
    test_tx[87] = (cl_int2) {1396,1197};
    test_tx[88] = (cl_int2) {1388,1168};
    test_tx[89] = (cl_int2) {1429,1228};
    test_tx[90] = (cl_int2) {1421,1242};
    test_tx[91] = (cl_int2) {1467,1263};
    test_tx[92] = (cl_int2) {1438,1228};
    test_tx[93] = (cl_int2) {1450,1216};
    test_tx[94] = (cl_int2) {1446,1213};
    test_tx[95] = (cl_int2) {1382,1251};
    test_tx[96] = (cl_int2) {1382,1235};
    test_tx[97] = (cl_int2) {1387,1237};
    test_tx[98] = (cl_int2) {1456,1213};
    test_tx[99] = (cl_int2) {1425,1226};
    test_tx[100] = (cl_int2) {1448,1202};
    test_tx[101] = (cl_int2) {1470,1218};
    test_tx[102] = (cl_int2) {1463,1228};
    test_tx[103] = (cl_int2) {1421,1235};
    test_tx[104] = (cl_int2) {1463,1233};
    test_tx[105] = (cl_int2) {1414,1221};
    test_tx[106] = (cl_int2) {1433,1208};
    test_tx[107] = (cl_int2) {1422,1202};
    test_tx[108] = (cl_int2) {1415,1212};
    test_tx[109] = (cl_int2) {1396,1201};
    test_tx[110] = (cl_int2) {1424,1234};
    test_tx[111] = (cl_int2) {1406,1205};
    test_tx[112] = (cl_int2) {1420,1231};
    test_tx[113] = (cl_int2) {1418,1226};
    test_tx[114] = (cl_int2) {1392,1196};
    test_tx[115] = (cl_int2) {1404,1150};
    test_tx[116] = (cl_int2) {1416,1233};
    test_tx[117] = (cl_int2) {1390,1156};
    test_tx[118] = (cl_int2) {1435,1211};
    test_tx[119] = (cl_int2) {1417,1220};
    test_tx[120] = (cl_int2) {1450,1235};
    test_tx[121] = (cl_int2) {1445,1218};
    test_tx[122] = (cl_int2) {1482,1206};
    test_tx[123] = (cl_int2) {1436,1177};
    test_tx[124] = (cl_int2) {1428,1218};
    test_tx[125] = (cl_int2) {1415,1246};
    test_tx[126] = (cl_int2) {1423,1225};
    test_tx[127] = (cl_int2) {1407,1241};
    test_tx[128] = (cl_int2) {1459,1225};
    test_tx[129] = (cl_int2) {1385,1160};
    test_tx[130] = (cl_int2) {1416,1259};
    test_tx[131] = (cl_int2) {1394,1242};
    test_tx[132] = (cl_int2) {1472,1247};
    test_tx[133] = (cl_int2) {1454,1214};
    test_tx[134] = (cl_int2) {1417,1182};
    test_tx[135] = (cl_int2) {1434,1228};
    test_tx[136] = (cl_int2) {1439,1236};
    test_tx[137] = (cl_int2) {1436,1198};
    test_tx[138] = (cl_int2) {1422,1226};
    test_tx[139] = (cl_int2) {1388,1163};
    test_tx[140] = (cl_int2) {1451,1224};
    test_tx[141] = (cl_int2) {1461,1273};
    test_tx[142] = (cl_int2) {1391,1222};
    test_tx[143] = (cl_int2) {1462,1178};
    test_tx[144] = (cl_int2) {1379,1231};
    test_tx[145] = (cl_int2) {1404,1250};
    test_tx[146] = (cl_int2) {1422,1227};
    test_tx[147] = (cl_int2) {1488,1234};
    test_tx[148] = (cl_int2) {1423,1225};
    test_tx[149] = (cl_int2) {1406,1198};
    test_tx[150] = (cl_int2) {1415,1200};
    test_tx[151] = (cl_int2) {1415,1234};
    test_tx[152] = (cl_int2) {1404,1193};
    test_tx[153] = (cl_int2) {1392,1207};

    /* for each transmitter */
    for (i = 0; i < ntest_tx; i++)
    {
        /* display completion percentage */
        G_percent (i, ntest_tx, 1);

        /* Transmitter coordinates */
        cl_int2 tx_coord = test_tx[i];

        G_message ("The transmitter is located at %d, %d", tx_coord.x,
                                                           tx_coord.y);
        /* Transmitter heights */
        //G_message ("Transmitter height above sea level is %d mts", tx_elevation);
        //G_message ("Transmitter anthena height above ground is %d mts", tx_anthena_height);

        /* transmitter data */ 
        cl_int4 tx_data = (cl_int4) {tx_coord.x, tx_coord.y,
                                     tx_elevation, tx_anthena_height};

        /* tile offset within the raster */
        cl_int2 tx_offset = (cl_int2) {tx_coord.x - radius,
                                       tx_coord.y - radius};

        /* set (more) remaining kernel parameters */
        clarg_set (ctx, krn, 1, tx_data);
        clarg_set (ctx, krn, 2, tx_offset);

        /* start kernel execution */
        clfork (ctx, 0, krn, &ndr, CL_EVENT_NOWAIT);

        clwait (ctx, 0, CL_KERNEL_EVENT|CL_MEM_EVENT|CL_EVENT_RELEASE);
    }

    /* sync memory */
    clmsync (ctx, 0, qrm_out, CL_MEM_HOST|CL_EVENT_WAIT);

    /* write the calculation output */
    save_raster (qrm_out, nrows, ncols, outfd);

    /* free allocated resources */
    free (test_tx);
    clclose (ctx, h);
    clfree (qrm_out);
    clfree (dem_in);

}