unsigned int kernel_launch (cl_kernel kernel, cl_context context, cl_command_queue cmd_queue, const char ** p, const char ** t, const struct gapmis_params * in, float * scores) { int error=1; unsigned int pats = get_number_of_sequences (p); unsigned int txts = get_number_of_sequences (t); unsigned int maxTxtLen = get_max_length (txts, t); unsigned int maxPatLen = get_max_length (pats, p); unsigned int pBlockSize = get_pblock_size (txts,32); unsigned int hproVecLen = pats * pBlockSize * (maxTxtLen + 1); unsigned int dproVecLen = pats * pBlockSize * (maxPatLen + 1); unsigned int * txtsVec = calloc(maxTxtLen*pBlockSize, sizeof(unsigned int)); unsigned int * patsVec = calloc(maxPatLen*pats, sizeof(unsigned int)); int * argsVec = malloc(sizeof(int)*(pats+7)); int * txtsLenVec = calloc(pBlockSize,sizeof(int)); float * pensVec = malloc(sizeof(float)*2); int * hproVec = calloc(hproVecLen,sizeof(int)); int * dproVec = calloc(dproVecLen,sizeof(int)); cl_int err; if(patsVec==NULL || txtsVec==NULL || argsVec==NULL || pensVec == NULL || txtsLenVec == NULL || hproVec==NULL || dproVec==NULL) { errno = MALLOC; return ( 0 ); } fill_txtsVec (txts, pBlockSize, t, txtsVec, in->scoring_matrix); fill_patsVec (pats, maxPatLen, p, patsVec, in->scoring_matrix); fill_argsVec (pats, txts, p, in->max_gap, pBlockSize, maxPatLen, maxTxtLen, argsVec); fill_txtsLenVec (txts, t, txtsLenVec); pensVec[0] = - in -> gap_open_pen; pensVec[1] = - in -> gap_extend_pen; /* GPU malloc */ cl_mem txtsVec_device = malloc_device (context, (maxTxtLen*pBlockSize)*sizeof(unsigned int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } /* copy from CPU to GPU mem */ init_device_mem_uint (context, cmd_queue, txtsVec_device, txtsVec, maxTxtLen*pBlockSize, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem patsVec_device = malloc_device (context, (maxPatLen*pats)*sizeof(unsigned int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_uint (context, cmd_queue, patsVec_device, patsVec,maxPatLen*pats, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem argsVec_device = malloc_device (context, (pats+7)*sizeof(int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_int (context, cmd_queue, argsVec_device, argsVec, pats+7, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem txtsLenVec_device = malloc_device (context, pBlockSize*sizeof(int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_int (context, cmd_queue, txtsLenVec_device, txtsLenVec, pBlockSize, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem pensVec_device = malloc_device (context, 2*sizeof(float), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_float (context, cmd_queue, pensVec_device, pensVec, 2, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem hproVec_device = malloc_device (context, hproVecLen*sizeof(int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_int (context, cmd_queue, hproVec_device, hproVec, hproVecLen, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem dproVec_device = malloc_device (context, dproVecLen*sizeof(int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_int (context, cmd_queue, dproVec_device, dproVec, dproVecLen, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem scrsVec_device = malloc_device (context, (pats*pBlockSize)*sizeof(float), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } err = clFinish(cmd_queue); if(err != CL_SUCCESS) { errno = GPUERROR; return ( 0 ); } /* connect the input arguments of the kernel with the corresponding mem */ set_kernel_arguments (kernel, cmd_queue, patsVec_device, txtsVec_device, argsVec_device, txtsLenVec_device, pensVec_device, hproVec_device, dproVec_device, scrsVec_device); /* synchronisation */ err = clFinish(cmd_queue); if(err != CL_SUCCESS) { errno = GPUERROR; return ( 0 ); } /* WorkSizeGlobal is the total number of threads of the device*/ size_t WorkSizeGlobal[] = {pBlockSize * pats}; /* WorkSizeLocal is the number of threads per group*/ size_t WorkSizeLocal[] = {pBlockSize}; /* kernel enters the command queue using WorkSizeGlobal and WorkSizeLocal */ err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, WorkSizeGlobal, WorkSizeLocal, 0, NULL, NULL); if(error) { errno = KERNEL; return ( 0 ); } /* finalise the kernel */ err = clFinish(cmd_queue); if(err != CL_SUCCESS) { errno = GPUERROR; return ( 0 ); } /* return the results from the GPU to the CPU */ read_device_mem_float (cmd_queue, pats*pBlockSize, scores, scrsVec_device, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } /* deallocation */ free (txtsVec); free (patsVec); free (argsVec); free (txtsLenVec); free (pensVec); free (hproVec); free (dproVec); clReleaseMemObject(patsVec_device); clReleaseMemObject(txtsVec_device); clReleaseMemObject(argsVec_device); clReleaseMemObject(txtsLenVec_device); clReleaseMemObject(pensVec_device); clReleaseMemObject(hproVec_device); clReleaseMemObject(dproVec_device); clReleaseMemObject(scrsVec_device); return ( 1 ); }
unsigned int kernel_launch ( cl_kernel kernel, cl_context context, cl_command_queue cmd_queue, unsigned int n, int * a, int * b, int * c ) { int error = 1; int * aVec = calloc( n , sizeof ( int ) ); int * bVec = calloc( n , sizeof ( int ) ); int * cVec = calloc( n , sizeof ( int ) ); cl_int err; if( aVec == NULL || bVec == NULL || cVec == NULL ) { return ( 0 ); } /* Here it is not needed */ fill_aVec ( n, a, aVec ); fill_bVec ( n, b, bVec ); cl_mem aVec_device = malloc_device (context, n * sizeof( int ), &error); if(error) { return ( 0 ); } init_device_mem_int (context, cmd_queue, aVec_device, aVec, n, &error); if(error) { return ( 0 ); } cl_mem bVec_device = malloc_device (context, n * sizeof( int ), &error); if(error) { return ( 0 ); } init_device_mem_int (context, cmd_queue, bVec_device, bVec, n, &error); if(error) { return ( 0 ); } cl_mem cVec_device = malloc_device (context, n * sizeof( int ), &error); if(error) { return ( 0 ); } init_device_mem_int (context, cmd_queue, cVec_device, cVec, n, &error); if(error) { return ( 0 ); } err = clFinish( cmd_queue ); if( err != CL_SUCCESS ) { return ( 0 ); } set_kernel_arguments ( kernel, cmd_queue, aVec_device, bVec_device, cVec_device ); err = clFinish( cmd_queue ); if( err != CL_SUCCESS ) { return ( 0 ); } size_t WorkSizeGlobal[] = {n}; size_t WorkSizeLocal[] = {1}; err = clEnqueueNDRangeKernel( cmd_queue, kernel, 1, NULL, WorkSizeGlobal, WorkSizeLocal, 0, NULL, NULL); if( err != CL_SUCCESS ) { return ( 0 ); } err=clFinish( cmd_queue ); if( err != CL_SUCCESS ) { return ( 0 ); } read_device_mem_int (cmd_queue, n, c, cVec_device, &error); if( error ) { return ( 0 ); } /*Here c should contain the result */ free (aVec); free (bVec); free (cVec); clReleaseMemObject(aVec_device); clReleaseMemObject(bVec_device); clReleaseMemObject(cVec_device); return ( 1 ); }