示例#1
0
GMM_EXPORT
cudaError_t cudaConfigureCall(
		dim3 gridDim,
		dim3 blockDim,
		size_t sharedMem,
		cudaStream_t stream)
{
	cudaError_t ret;

	if (initialized)
		ret = gmm_cudaConfigureCall(gridDim, blockDim, sharedMem, stream);
	else {
		gprint(WARN, "cudaConfigureCall called outside GMM\n");
		ret = nv_cudaConfigureCall(gridDim, blockDim, sharedMem, stream);
	}

	return ret;
}
示例#2
0
cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream)
{
        static cudaError_t (*nv_cudaConfigureCall)(dim3, dim3, size_t, cudaStream_t) = NULL;
        cudaError_t ret;
        struct timeval t;

        if(!nv_cudaStreamCreate) {
                nv_cudaStreamCreate = dlsym(RTLD_NEXT, "cudaStreamCreate");
                if(!nv_cudaStreamCreate) {
                        fprintf(stderr, "failed to find symbol cudaStreamCreate : %s\n", dlerror());
                        return cudaErrorSharedObjectSymbolNotFound;
                }
	}	
	
	if(!mystream)
		ret = nv_cudaStreamCreate(&mystream);

        if(!nv_cudaConfigureCall) {
                nv_cudaConfigureCall = dlsym(RTLD_NEXT, "cudaConfigureCall");
                if(!nv_cudaConfigureCall) {
                        fprintf(stderr, "failed to find symbol cudaConfigureCall: %s\n", dlerror());
                        return cudaErrorSharedObjectSymbolNotFound;
                }
        }

        //gettimeofday(&t, NULL);
        //printf("[gvm] %lf intercepting cudaConfigureCall\n", t.tv_sec + t.tv_usec / 1000000.0);

	// segmentation fault with the statement below if
	//if (stream == 0)
 	       ret = nv_cudaConfigureCall(gridDim, blockDim, sharedMem, mystream);
	//else
 	//       ret = nv_cudaConfigureCall(gridDim, blockDim, sharedMem, stream);

        //gettimeofday(&t, NULL);
        //printf("[gvm] %lf intercepted cudaConfigureCall ( %d ) \n", t.tv_sec + t.tv_usec / 1000000.0, (int) mystream);

        return ret;
}
示例#3
0
cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream)
{
	static cudaError_t (*nv_cudaConfigureCall)(dim3, dim3, size_t, cudaStream_t) = NULL;
	cudaError_t ret;
	struct timeval t;

	if(!nv_cudaConfigureCall) {
		nv_cudaConfigureCall = dlsym(RTLD_NEXT, "cudaConfigureCall");
		if(!nv_cudaConfigureCall) {
			fprintf(stderr, "failed to find symbol cudaConfigureCall: %s\n", dlerror());
			return cudaErrorSharedObjectSymbolNotFound;
		}
	}

	gettimeofday(&t, NULL);
	printf("[gvm] %lf intercepting cudaConfigureCall\n", t.tv_sec + t.tv_usec / 1000000.0);

	ret = nv_cudaConfigureCall(gridDim, blockDim, sharedMem, stream);

	gettimeofday(&t, NULL);
	printf("[gvm] %lf intercepted cudaConfigureCall\n", t.tv_sec + t.tv_usec / 1000000.0);

	return ret;
}