cudaError_t cudaLaunch (const char *p1)
{
	cudaError_t res;

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME": THREAD %d cudaLaunch is at %p\n", THREADID, real_cudaLaunch);
	fprintf (stderr, PACKAGE_NAME": THREAD %d cudaLaunch params %p\n", THREADID, p1);
#endif

	if (real_cudaLaunch != NULL && mpitrace_on && Extrae_get_trace_CUDA())
	{
		Extrae_cudaLaunch_Enter (p1);
		res = real_cudaLaunch (p1);
		Extrae_cudaLaunch_Exit ();
	}
	else if (real_cudaLaunch != NULL && !(mpitrace_on && Extrae_get_trace_CUDA()))
	{
		res = real_cudaLaunch (p1);
	}
	else
	{
		fprintf (stderr, "Unable to find cudaLaunch in DSOs! Dying...\n");
		exit (0);
	}

	return res;
}
Beispiel #2
0
cudaError_t cudaLaunch (const void *entry) {
    struct ld_kernel_s *ldKernel;
    struct kernel_lookup_s *kernel;
    cudaError_t err;
    int i;

    for (i = 0; cuda_lookup_table[i].address != NULL; i++) {
        if (cuda_lookup_table[i].address == entry) {
            break;
        }
    }
    if (!cuda_lookup_table[i].address) {
        warning("Couldn't find kernel @%p\n", entry);
        goto error;
    }

    kernel = &cuda_lookup_table[i];
    ldKernel = find_kernel_entry(kernel);
    assert(ldKernel);

    dbg_notify_event();

    for (i = 0; i < ldKernel->nb_params; i++) {
        struct ld_kern_param_s *ldParam = &ldKernel->params[i];
        const void **current_arg = (const void **) temp_arguments[i].arg;

        if (ldParam->is_pointer) {
            size_t offset = 0;
            struct ld_mem_s *ldBuffer = find_off_mem_entry(*current_arg, &offset);
            /* better handling of invalid buffers ?*/
            if (!ldBuffer) {
                error("in kernel %s, arg #%d, invalid bufer %s@%p", ldKernel->name, i, ldParam->name, *current_arg);
            }
            assert(ldBuffer);
            kernel_set_buffer_arg_event (ldKernel, ldParam, i, ldBuffer, offset);
        } else {
            kernel_set_scalar_arg_event (ldKernel, ldParam, i, current_arg);
        }
    }

    kernel_executed_event(ldKernel, configure_get_worksizes(NULL, NULL), 3);
    err = real_cudaLaunch(entry);
    kernel_finished_event(ldKernel, configure_get_worksizes(NULL, NULL), 3);

    return err;

error:
    return real_cudaLaunch(entry);
}