예제 #1
0
BoidModelSHWay1::BoidModelSHWay1(CLHelper* clHlpr, std::vector<Vec4> pos, std::vector<Vec4> vel, std::vector<Vec4> goal, std::vector<Vec4> color, simParams_t* simP) : BoidModel(clHlpr)
{
	simTimeDisc = std::vector<const char*>(10);
	simTimeDisc[0] = "Boid Model SH way following";
	simTimeDisc[1] = "OpenCL Simulation Times:";
	simTimeDisc[2] = "";
	simTimeDisc[3] = "";
	simTimeDisc[4] = "";
	simTimeDisc[5] = "";
	simTimeDisc[6] = "";
	simTimeDisc[7] = "";
	simTimeDisc[8] = "";
	simTimeDisc[9] = "";

	context = clHelper->getContext();
	queue = clHelper->getCmdQueue();
	devices = clHelper->getDevices();

	simParams = *simP;

	num = simParams.numBodies;

	createBuffer(pos, vel, goal, color);
	loadData(goal);

	programBoid    = loadProgram(kernel_path + "BoidModelSHWay1_kernel_v1.cl");
	programBitonic = loadProgram(kernel_path + "bitonic_sort.cl");

	loadKernel();
	log("setup complete - simulation is runable");
}
예제 #2
0
void FDMHeatWidget::initializeCL()
{
    // Configurar OpenCL con soporte de OpenGL
    if(!setupOpenCLGL(clContext, clQueue, clDevice)) {
        qDebug() << "FDMHeatWidget::initializeCL: Error al configurar OpenCL.";
        return;
    }

    if(!loadKernel(clContext, &renderKernel, clDevice, "../src/systemToImage.cl", "systemToImage")) {
        qDebug() << "FDMHeatWidget::initializeCL: Error al cargar kernel.";
        return;
    }

    // Mapear la memoria la textura en OpenCL
    // Desde OpenCL solo vamos a escribir en la textura.
    cl_int error;
    textureMem= clCreateFromGLTexture2D(clContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, texture, &error);
    if(checkError(error, "clCreateFromGLTexture2D"))
        return;

    // Cargar paleta y subirla a la GPU
    QImage palette("palette.png");
    if(palette.isNull())
        return;
    paletteMem= clCreateBuffer(clContext, CL_MEM_READ_ONLY, palette.byteCount(), NULL, &error);
    error |= clEnqueueWriteBuffer(clQueue, paletteMem, CL_FALSE, 0, palette.byteCount(), palette.bits(), 0, NULL, NULL);
    if(checkError(error, "clCreateBuffer"))
        return;

    // Liberamos el semaforo
    clConfigReady.release();
}
예제 #3
0
파일: Kernels.cpp 프로젝트: assutech/isis3
 /**
  * @brief Interogate a PvlKeyword for location of kernel file names
  * 
  * This method is intended to find keywords that refer to SPICE Table Blobs
  * and look in those Table objects for the actual names of SPICE kernel files.
  * They are then loaded via the loadKernel() method.
  * 
  * @param key PvlKeyword containing SPICE kernel names
  * @param tblname Name of Table where the SPICE blob is located in the label
  * @param pvl Pvl label to search for the SPICE Table Object Blob
  */
 void Kernels::loadKernelFromTable(PvlKeyword &key, 
                                        const std::string &tblname, Pvl &pvl) {
   if (iString::UpCase(key[0]) != "TABLE") {
     loadKernel(key);
   }
   else {
     PvlObject::PvlObjectIterator objIter;
     for (objIter = pvl.BeginObject() ; objIter != pvl.EndObject() ; ++objIter) {
       if (iString::UpCase(objIter->Name()) == "TABLE") {
         if (objIter->HasKeyword("Name")) {
           if (iString::Equal(objIter->FindKeyword("Name")[0], tblname)) {
             loadKernel(objIter->FindKeyword("Kernels"));
             return;
           }
         }
       }
     }
   }
   return;
 }
예제 #4
0
Node *Path::getNextNode()
{
	std::vector<Node *> candidates =  loadKernel();
	if(candidates.size() == 0)
		return NULL;
	if(candidates.size() == 1)
		return candidates[0];

	//there are more than one candidate
	return candidates[0];

}
예제 #5
0
void Convolution3DCLBuffer::createProgramAndLoadKernel(const std::string& fileName, const std::string& kernelName, size_t const* filterSize)
{
	std::string content;
	std::ifstream in(fileName, std::ios::in);
	if(in)
	{
		in.seekg(0, std::ios::end);
		content.resize(in.tellg());
		in.seekg(0, std::ios::beg);
		in.read(&content[0], content.size());
		in.close();
	}

	createProgram(content, filterSize);
	loadKernel(kernelName);
}
예제 #6
0
파일: boot.c 프로젝트: GemBit/PICOS
void boot(void) {
    bootData.totalSectors = diskParameter.totalSectors;
    bootData.sectorSize = diskParameter.sectorSize;
    bootData.mbr = (MBR *) codeStart;

    /* 内存探测应该这最先,这要用于分配缓冲大小 */
    detectMemory();

    /* 加载内核 */
    loadKernel();

    /* 设置高分辨率模式应该在最后,因为前面有文字打印操作 */
    setVbeMode(getPreferredResolution());

    executeKernel();
}
예제 #7
0
CoreServer::Result CoreServer::initialize()
{
    Result r;

    // Only core0 needs to start other coreservers
    if (m_info.coreId != 0)
        return setupChannels();

    if ((r = loadKernel()) != Success)
        return r;

    if ((r = discover()) != Success)
        return r;

    if ((r = setupChannels()) != Success)
        return r;

    return bootAll();
}
예제 #8
0
void GLWidget::initializeCL() 
{
    qDebug() << "Initializing OpenCL";
    if (!setupOpenCLGL(clContext, clQueue, clDevice)) {
	qDebug() << "OpenCL initialization error";
	return;
    }    
    
    cl_int error;

    loadKernel(clContext, &clKernel, clDevice, "../src/vboproc.cl", "vboproc");
    
    // Creo OpenCL buffer a partir del OpenGL buffer
    qDebug() << "Creando OpenCL buffer.";
    clvbo = clCreateFromGLBuffer(clContext, CL_MEM_READ_WRITE, particlesVBO->bufferId(), &error);
    if (checkError(error, "clCreateFromGLBuffer")) {
	qDebug() << "OpenCL initialization error";
	return;
    }
    
    // Setean los parametros del kernel, y luego se encola su ejecucion
    qDebug() << "Seteo los parametros del kernel.";
    error  = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void*)&clvbo);
    error |= clSetKernelArg(clKernel, 1, sizeof(cl_int), (void*)&vertexNumber);
    const float cubeLims[]= {cubeLimits.x(), cubeLimits.y(), cubeLimits.z()};
    error |= clSetKernelArg(clKernel, 2, sizeof(cl_float3), (void*)&cubeLims);
    error |= clSetKernelArg(clKernel, 3, sizeof(cl_float), (void*)&timestep);
    if(checkError(error, "clSetKernelArg")) {
	qDebug() << "OpenCL initialization error";
        return;
    }
    
    // Una vez creado el kernel, decremento la referencia al programa creado
    qDebug() << "OpenCL initialized successfully";
    
}
예제 #9
0
파일: main_gs.c 프로젝트: MichaelQuell/PSNM
int main(void) {
//time meassuring
  	struct timeval tvs;

//variables
	int 	Nx=1024;
	int		Ny=1024;
	int 	plotnum=0;
	int	  	Tmax=2;
	int 	plottime=0;
	int	  	plotgap=1;
	double	Lx=1.0;
	double 	Ly=1.0;
	double	dt=0.0;	
	double	A=0.0;
	double	B=0.0;
	double	Du=0.0;
	double	Dv=0.0;
//splitting coefficients
	double	a=0.5;	
	double 	b=0.5;
	double 	c=1.0;
//loop counters	
	int i=0;
	int j=0;
	int n=0;

	double*umax=NULL;
	double*vmax=NULL;
	parainit(&Nx,&Ny,&Tmax,&plotgap,&Lx,&Ly,&dt,&Du,&Dv,&A,&B);
	plottime=plotgap;
	vmax=(double*)malloc((Tmax/plotgap+1)*sizeof(double));
	umax=(double*)malloc((Tmax/plotgap+1)*sizeof(double));
//openCL variables
    cl_platform_id *platform_id = NULL;
    cl_kernel frequencies = NULL, initialdata = NULL, linearpart=NULL;
	cl_kernel nonlinearpart_a=NULL, nonlinearpart_b=NULL;
    cl_int ret;
    cl_uint num_platforms;
// Detect how many platforms there are.
	ret = clGetPlatformIDs(0, NULL, &num_platforms);
// Allocate enough space for the number of platforms.
	platform_id = (cl_platform_id*) malloc(num_platforms*sizeof(cl_platform_id));
// Store the platforms
	ret = clGetPlatformIDs(num_platforms, platform_id, NULL);
	printf("Found %d platform(s)!\n",num_platforms);
    cl_uint *num_devices;
	num_devices=(cl_uint*) malloc(num_platforms*sizeof(cl_uint));
    cl_device_id **device_id = NULL;
	device_id =(cl_device_id**) malloc(num_platforms*sizeof(cl_device_id*));
// Detect number of devices in the platforms
	for(i=0;i<num_platforms;i++){
		char buf[65536];
		size_t size;
		ret = clGetPlatformInfo(platform_id[i],CL_PLATFORM_VERSION,sizeof(buf),buf,&size);
		printf("%s\n",buf);
		ret = clGetDeviceIDs(platform_id[i],CL_DEVICE_TYPE_ALL,0,NULL,num_devices);
		printf("Found %d device(s) on platform %d!\n", num_devices[i],i);
		ret = clGetPlatformInfo(platform_id[i],CL_PLATFORM_NAME,sizeof(buf),buf,&size);
		printf("%s ",buf);
// Store numDevices from platform
		device_id[i]=(cl_device_id*) malloc(num_devices[i]*sizeof(device_id));
		ret = clGetDeviceIDs(platform_id[i],CL_DEVICE_TYPE_ALL,num_devices[i],device_id[i],NULL);
		for(j=0;j<num_devices[i];j++){
			ret = clGetDeviceInfo(device_id[i][j],CL_DEVICE_NAME,sizeof(buf),buf,&size);
			printf("%s (%d,%d)\n",buf,i,j);
		}
	}
//create context and command_queue
    cl_context context = NULL;
   	cl_command_queue command_queue = NULL;
//Which platform and device do i choose?
	int	chooseplatform=0;
	int	choosedevice=0;	  
	printf("Choose platform %d and device %d!\n",chooseplatform,choosedevice);
	context = clCreateContext( NULL, num_devices[chooseplatform], device_id[chooseplatform], NULL, NULL, &ret);
	if(ret!=CL_SUCCESS){printf("createContext ret:%d\n",ret); exit(1); }
	command_queue = clCreateCommandQueue(context, device_id[chooseplatform][choosedevice], 0, &ret);
	if(ret!=CL_SUCCESS){printf("createCommandQueue ret:%d\n",ret); exit(1); }

//OpenCL arrays
    cl_mem cl_u = NULL,cl_v = NULL;
   	cl_mem cl_uhat = NULL, cl_vhat = NULL;
    cl_mem cl_kx = NULL, cl_ky = NULL;

//FFT
	clfftPlanHandle planHandle;
    cl_mem tmpBuffer = NULL;
	fftinit(&planHandle,&context, &command_queue, &tmpBuffer, Nx, Ny);

//allocate gpu memory/
	cl_u=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx* Ny* sizeof(double), NULL, &ret);
	cl_v=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx* Ny* sizeof(double), NULL, &ret);
	cl_uhat=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx * Ny* sizeof(double), NULL, &ret);
	cl_vhat=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx * Ny* sizeof(double), NULL, &ret);
	cl_kx = clCreateBuffer(context, CL_MEM_READ_WRITE, Nx * sizeof(double), NULL, &ret);
	cl_ky = clCreateBuffer(context, CL_MEM_READ_WRITE, Ny * sizeof(double), NULL, &ret);

	printf("allocated space\n");
//load the kernels
	loadKernel(&frequencies,&context,&device_id[chooseplatform][choosedevice],"frequencies");
	loadKernel(&initialdata,&context,&device_id[chooseplatform][choosedevice],"initialdata"); 
	loadKernel(&linearpart,&context,&device_id[chooseplatform][choosedevice],"linearpart"); 
	loadKernel(&nonlinearpart_a,&context,&device_id[chooseplatform][choosedevice],"nonlinearpart_a"); 
	loadKernel(&nonlinearpart_b,&context,&device_id[chooseplatform][choosedevice],"nonlinearpart_b"); 

	size_t global_work_size[1] = {Nx*Ny};
	size_t global_work_size_X[1] = {Nx};
	size_t global_work_size_Y[1] = {Ny};
//frequencies
    ret = clSetKernelArg(frequencies, 0, sizeof(cl_mem),(void *)&cl_kx);
	ret = clSetKernelArg(frequencies, 1, sizeof(double),(void* )&Lx);
	ret = clSetKernelArg(frequencies, 2, sizeof(int),(void* )&Nx);
    ret = clEnqueueNDRangeKernel(command_queue, frequencies, 1, NULL, global_work_size_X, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);
    ret = clSetKernelArg(frequencies, 0, sizeof(cl_mem),(void *)&cl_ky);
	ret = clSetKernelArg(frequencies, 1, sizeof(double),(void* )&Ly);
	ret = clSetKernelArg(frequencies, 2, sizeof(int),(void* )&Ny);
    ret = clEnqueueNDRangeKernel(command_queue, frequencies, 1, NULL, global_work_size_Y, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);
//printCL(&cl_kx,&command_queue,Nx,1);
//printCL(&cl_ky,&command_queue,1,Ny);
//inintial data
    ret = clSetKernelArg(initialdata, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(initialdata, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(initialdata, 2, sizeof(int),(void* )&Nx);
	ret = clSetKernelArg(initialdata, 3, sizeof(int),(void* )&Ny);
	ret = clSetKernelArg(initialdata, 4, sizeof(double),(void* )&Lx);
	ret = clSetKernelArg(initialdata, 5, sizeof(double),(void* )&Ly);
    ret = clEnqueueNDRangeKernel(command_queue, initialdata, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);
//make output
    writedata_C(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
    writedata_C(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
    umax[plotnum]=writeimage(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
    vmax[plotnum]=writeimage(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
	printf("Got initial data, starting timestepping\n");
	mtime_s(&tvs);

	for(n=0;n<=Tmax;n++){
//nonlinearpart_a
    ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
	ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
    ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);	

//nonlinearpart_b
    ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
	ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
    ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);

//linear
	fft2dfor(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
	fft2dfor(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);
//printf("A%f,B%f\n",A,B);
    ret = clSetKernelArg(linearpart, 0, sizeof(cl_mem),(void *)&cl_uhat);
    ret = clSetKernelArg(linearpart, 1, sizeof(cl_mem),(void *)&cl_vhat);
	ret = clSetKernelArg(linearpart, 2, sizeof(cl_mem),(void* )&cl_kx);
	ret = clSetKernelArg(linearpart, 3, sizeof(cl_mem),(void* )&cl_ky);
	ret = clSetKernelArg(linearpart, 4, sizeof(double),(void* )&Du);
	ret = clSetKernelArg(linearpart, 5, sizeof(double),(void* )&Dv);
	ret = clSetKernelArg(linearpart, 6, sizeof(double),(void* )&A);
	ret = clSetKernelArg(linearpart, 7, sizeof(double),(void* )&B);
	ret = clSetKernelArg(linearpart, 8, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(linearpart, 9, sizeof(double),(void* )&c);
	ret = clSetKernelArg(linearpart, 10, sizeof(int),(void* )&Nx);
	ret = clSetKernelArg(linearpart, 11, sizeof(int),(void* )&Ny);
    ret = clEnqueueNDRangeKernel(command_queue, linearpart, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);

	fft2dback(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
  	fft2dback(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);

//nonlinearpart_b
    ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
	ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
    ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);		
//nonlinearpart_a
    ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
	ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
    ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);	
// done
	if(n==plottime){
		printf("time:%f, step:%d,%d,umax:%f,vmax:%f\n",n*dt,n,plotnum,umax[plotnum],vmax[plotnum]);
		plottime=plottime+plotgap;
		plotnum=plotnum+1;
   	 	writedata_C(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
    	writedata_C(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
        umax[plotnum]=writeimage(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
        vmax[plotnum]=writeimage(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
	}
}//end timestepping

	printf("Finished time stepping\n");
	mtime_e(&tvs,"Programm took:");
	writearray(umax,(Tmax/plotgap)+1,"u");
	writearray(vmax,(Tmax/plotgap)+1,"v");
	free(umax);
	free(vmax);	

	clReleaseMemObject(cl_u);
	clReleaseMemObject(cl_v);
	clReleaseMemObject(cl_uhat);
	clReleaseMemObject(cl_vhat);
	clReleaseMemObject(cl_kx);
	clReleaseMemObject(cl_ky);

    ret = clReleaseKernel(initialdata); 
    ret = clReleaseKernel(frequencies); 
    ret = clReleaseKernel(linearpart); 
    ret = clReleaseKernel(nonlinearpart_a);
    ret = clReleaseKernel(nonlinearpart_b);

	fftdestroy(&planHandle, &tmpBuffer);

	ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);

	for(i=0;i<num_platforms;i++){free(device_id[i]);}
	free(device_id);
	free(platform_id);
	free(num_devices);
	printf("Program execution complete\n");

	return 0;
}