Exemple #1
0
unsigned int kernel_launch (cl_kernel kernel, cl_context context, cl_command_queue cmd_queue, const char ** p, const char ** t, const struct gapmis_params * in, float * scores)
{
        int error=1;
	unsigned int	pats = get_number_of_sequences (p);
	unsigned int	txts = get_number_of_sequences (t);

	unsigned int	maxTxtLen = get_max_length (txts, t);
	unsigned int	maxPatLen = get_max_length (pats, p);
	unsigned int	pBlockSize = get_pblock_size (txts,32); 
	unsigned int 	hproVecLen = pats * pBlockSize * (maxTxtLen + 1);
	unsigned int 	dproVecLen = pats * pBlockSize * (maxPatLen + 1);
	
	unsigned int * txtsVec = calloc(maxTxtLen*pBlockSize, sizeof(unsigned int));
	unsigned int * patsVec = calloc(maxPatLen*pats, sizeof(unsigned int));
	         int * argsVec = malloc(sizeof(int)*(pats+7));		
		 int * txtsLenVec = calloc(pBlockSize,sizeof(int));
	       float * pensVec = malloc(sizeof(float)*2);
	         int * hproVec = calloc(hproVecLen,sizeof(int));
	         int * dproVec = calloc(dproVecLen,sizeof(int));

	cl_int err;	

	if(patsVec==NULL   || txtsVec==NULL      || argsVec==NULL || 
           pensVec == NULL || txtsLenVec == NULL || hproVec==NULL || 
	   dproVec==NULL)
	{	
	 	errno = MALLOC;
      		return ( 0 );
	}


	fill_txtsVec (txts, pBlockSize, t, txtsVec, in->scoring_matrix);
	fill_patsVec (pats, maxPatLen, p, patsVec, in->scoring_matrix);
	fill_argsVec (pats, txts, p, in->max_gap, pBlockSize, maxPatLen, maxTxtLen, argsVec);
	fill_txtsLenVec (txts, t, txtsLenVec);

	pensVec[0] = - in -> gap_open_pen;
	pensVec[1] = - in -> gap_extend_pen;	
	
	/* GPU malloc */
	cl_mem txtsVec_device = malloc_device (context, (maxTxtLen*pBlockSize)*sizeof(unsigned int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}
	
	/* copy from CPU to GPU mem */
	init_device_mem_uint (context, cmd_queue, txtsVec_device, txtsVec, maxTxtLen*pBlockSize, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}
	
	cl_mem patsVec_device = malloc_device (context, (maxPatLen*pats)*sizeof(unsigned int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_uint (context, cmd_queue, patsVec_device, patsVec,maxPatLen*pats, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem argsVec_device = malloc_device (context, (pats+7)*sizeof(int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, argsVec_device, argsVec, pats+7, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem txtsLenVec_device = malloc_device (context, pBlockSize*sizeof(int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, txtsLenVec_device, txtsLenVec, pBlockSize, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem pensVec_device = malloc_device (context, 2*sizeof(float), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_float (context, cmd_queue, pensVec_device, pensVec, 2, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem hproVec_device = malloc_device (context, hproVecLen*sizeof(int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, hproVec_device, hproVec, hproVecLen, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem dproVec_device = malloc_device (context, dproVecLen*sizeof(int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, dproVec_device, dproVec, dproVecLen, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem scrsVec_device = malloc_device (context, (pats*pBlockSize)*sizeof(float), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	err = clFinish(cmd_queue);
	if(err != CL_SUCCESS)
	{
	 	errno = GPUERROR;
      		return ( 0 );	
	}

	/* connect the input arguments of the kernel with the corresponding mem */
	set_kernel_arguments (kernel, cmd_queue, patsVec_device, txtsVec_device, argsVec_device, txtsLenVec_device, pensVec_device, hproVec_device, dproVec_device, scrsVec_device);

	/* synchronisation */
	err = clFinish(cmd_queue);
	if(err != CL_SUCCESS)
	{
	 	errno = GPUERROR;
      		return ( 0 );	
	}	


	/* WorkSizeGlobal is the total number of threads of the device*/
	size_t WorkSizeGlobal[] = {pBlockSize * pats};
	/* WorkSizeLocal is the number of threads per group*/
	size_t WorkSizeLocal[] = {pBlockSize};

	/* kernel enters the command queue using WorkSizeGlobal and WorkSizeLocal */
	err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, WorkSizeGlobal, WorkSizeLocal, 0, NULL, NULL);
	if(error)
	{
	 	errno = KERNEL;
      		return ( 0 );	
	}

	/* finalise the kernel */
	err = clFinish(cmd_queue);
	if(err != CL_SUCCESS)
	{
	 	errno = GPUERROR;
      		return ( 0 );	
	}	

	/* return the results from the GPU to the CPU */
	read_device_mem_float (cmd_queue, pats*pBlockSize, scores, scrsVec_device, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	/* deallocation */
	free (txtsVec);
	free (patsVec);
	free (argsVec);
	free (txtsLenVec);
	free (pensVec);
	free (hproVec);
	free (dproVec);

	clReleaseMemObject(patsVec_device);
	clReleaseMemObject(txtsVec_device);
	clReleaseMemObject(argsVec_device);
	clReleaseMemObject(txtsLenVec_device);
	clReleaseMemObject(pensVec_device);
	clReleaseMemObject(hproVec_device);
	clReleaseMemObject(dproVec_device);
	clReleaseMemObject(scrsVec_device);

	return ( 1 );
}
Exemple #2
0
unsigned int kernel_launch ( cl_kernel kernel, cl_context context, cl_command_queue cmd_queue, unsigned int n, int * a, int * b, int * c )
{
        int error = 1;

	int * aVec = calloc( n , sizeof ( int ) );
	int * bVec = calloc( n , sizeof ( int ) );
        int * cVec = calloc( n , sizeof ( int ) ); 

	cl_int err;	

	if( aVec == NULL   || bVec == NULL      || cVec == NULL )
	{	
      		return ( 0 );
	}

        /* Here it is not needed */
	fill_aVec ( n, a, aVec );
	fill_bVec ( n, b, bVec );

	cl_mem aVec_device = malloc_device (context, n * sizeof( int ), &error);
	if(error)
	{
      		return ( 0 );	
	}
	
	init_device_mem_int (context, cmd_queue, aVec_device, aVec, n, &error);
	if(error)
	{
      		return ( 0 );	
	}
	
	cl_mem bVec_device = malloc_device (context, n * sizeof( int ), &error);
	if(error)
	{
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, bVec_device, bVec, n, &error);
	if(error)
	{
      		return ( 0 );	
	}

	cl_mem cVec_device = malloc_device (context, n * sizeof( int ), &error);
	if(error)
	{
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, cVec_device, cVec, n, &error);
	if(error)
	{
      		return ( 0 );	
	}

	err = clFinish( cmd_queue );
	if( err != CL_SUCCESS )
	{
      		return ( 0 );	
	}

	set_kernel_arguments ( kernel, cmd_queue, aVec_device, bVec_device, cVec_device );

	err = clFinish( cmd_queue );
	if( err != CL_SUCCESS )
	{
      		return ( 0 );	
	}	

	size_t WorkSizeGlobal[] = {n};
	size_t WorkSizeLocal[]  = {1};

	err = clEnqueueNDRangeKernel( cmd_queue, kernel, 1, NULL, WorkSizeGlobal, WorkSizeLocal, 0, NULL, NULL);
	if( err != CL_SUCCESS )
	{
      		return ( 0 );	
	}

	err=clFinish( cmd_queue );
	if( err != CL_SUCCESS )
	{
      		return ( 0 );	
	}	

	read_device_mem_int (cmd_queue, n, c, cVec_device, &error);
	if( error )
	{
      		return ( 0 );	
	}

	/*Here c should contain the result */
	free (aVec);
	free (bVec);
	free (cVec);

	clReleaseMemObject(aVec_device);
	clReleaseMemObject(bVec_device);
	clReleaseMemObject(cVec_device);

	return ( 1 );
}