static void send_bind_cont(void *arg) { struct ump_chan *uc = arg; struct monitor_binding *b = uc->monitor_binding; errval_t err; /* Send bind request to the monitor */ assert(uc->monitor_binding == b); assert(b->tx_vtbl.bind_ump_client_request); err = b->tx_vtbl.bind_ump_client_request(b, NOP_CONT, uc->iref, (uintptr_t)uc, uc->frame, uc->inchanlen, uc->outchanlen, uc->notify_cap); if (err_is_ok(err)) { // request sent ok event_mutex_unlock(&b->mutex); } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { // register to retry err = b->register_send(b, b->waitset, MKCONT(send_bind_cont,uc)); assert(err_is_ok(err)); // we hold the monitor binding mutex } else { // permanent failure sending message event_mutex_unlock(&b->mutex); uc->bind_continuation.handler(uc->bind_continuation.st, err_push(err, LIB_ERR_BIND_UMP_REQ), NULL, NULL_CAP); } }
static void send_bind_reply(void *arg) { struct bind_lmp_reply_state *st = arg; struct monitor_binding *b = st->b; errval_t err; err = st->b->tx_vtbl.bind_lmp_reply_monitor(st->b, NOP_CONT, st->args.err, st->args.mon_id, st->args.conn_id, st->args.ep); if (err_is_ok(err)) { event_mutex_unlock(&b->mutex); free(st); } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { err = st->b->register_send(st->b, st->b->waitset, MKCONT(send_bind_reply,st)); assert(err_is_ok(err)); // shouldn't fail, as we have the mutex } else { event_mutex_unlock(&b->mutex); USER_PANIC_ERR(err, "failed sending back reply to LMP bind request;" " request dropped!"); if (st->lc != NULL) { lmp_chan_destroy(st->lc); // FIXME: how do we tell the binding about this!? } free(st); } }
/** * \ brief Internal function to send a reply back to the monitor * */ static void send_bind_reply(void *st) { errval_t err; struct bind_multihop_reply_state *reply_state = st; struct monitor_binding *monitor_binding = reply_state->monitor_binding; // send back a bind success / failure message to the monitor MULTIHOP_DEBUG("sending reply back to monitor...\n"); err = monitor_binding->tx_vtbl.multihop_bind_service_reply(monitor_binding, NOP_CONT, reply_state->args.receiver_vci, reply_state->args.sender_vci, reply_state->args.err); if (err_is_ok(err)) { event_mutex_unlock(&monitor_binding->mutex); free(reply_state); } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { err = monitor_binding->register_send(monitor_binding, monitor_binding->waitset, MKCONT(send_bind_reply, reply_state)); assert(err_is_ok(err)); // this shouldn't fail, as we have the mutex } else { event_mutex_unlock(&monitor_binding->mutex); USER_PANIC_ERR( err, "failed sending back reply to multi-hop bind request to monitor"); free(st); } }
int __clGetWorkThreadIndex (_cl_event * event_data) { int thread_index = -1; int i; event_mutex_lock ("__clGetWorkThreadIndex", event_data); for (i = 0; i < MAX_THREADS_PER_WORK_ITEM; i++) { if (event_data->thread[i] == pthread_self ()) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clGetWorkThreadIndex: Thread id (%d) found @(%d)\n", (int) event_data->thread[i], i); #endif // #ifdef OCL_DEBUG_MESSAGES thread_index = i; break; } } if (thread_index == -1) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clBuildKernelArgumentData: Could not find work thread index\n"); #endif // #ifdef OCL_DEBUG_MESSAGES } event_mutex_unlock ("__clGetWorkThreadIndex", event_data); return thread_index; }
static void bfscope_send_flush_ack_cont(void* arg) { errval_t err; struct bfscope_ack_send_state *state = (struct bfscope_ack_send_state*) arg; struct monitor_binding *monitor_binding = state->monitor_binding; err = monitor_binding->tx_vtbl.bfscope_flush_ack(monitor_binding, MKCONT(free, state)); if (err_is_ok(err)) { event_mutex_unlock(&monitor_binding->mutex); } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { err = monitor_binding->register_send(monitor_binding, monitor_binding->waitset, MKCONT(&bfscope_send_flush_ack_cont, state)); assert(err_is_ok(err)); } else { event_mutex_unlock(&monitor_binding->mutex); //TODO: Error handling USER_PANIC_ERR(err, "Could not send flush ack message to monitor of bfscope"); } }
void * __clWorkThreadReadBuffer (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; _cl_mem *src_memobj_data = work_item_data->mem_src_buffer_data; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "ReadBuffer: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadReadBuffer", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadReadBuffer", event_data); // copy work_item_data->mem_src_buffer_data to work_item_data->mem_ptr if (work_item_data->mem_ptr == NULL || src_memobj_data == NULL) { // invalid buffers... #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "__clWorkThreadReadBuffer: invalid buffers\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_INVALID_ARG_VALUE; return __clWorkThreadFinish (work_item_data, pthread_self ()); } // do not copy if source and destination pointers are the same // this may occur when using CL_MEM_USE_HOST_PTR buffers if (work_item_data->mem_ptr != src_memobj_data->data + work_item_data->mem_src_offset) { // copy the data memcpy (work_item_data->mem_ptr, src_memobj_data->data + work_item_data->mem_src_offset, work_item_data->mem_cb); } #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "ReadBuffer: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
static void send_bind_cont(void *arg) { struct lmp_chan *lc = arg; struct monitor_binding *b = lc->monitor_binding; errval_t err; /* Send bind request to the monitor */ err = b->tx_vtbl.bind_lmp_client_request(b, NOP_CONT, lc->iref, (uintptr_t)lc, lc->buflen_words, lc->local_cap); if (err_is_ok(err)) { // request sent ok event_mutex_unlock(&b->mutex); } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { // register to retry err = b->register_send(b, b->waitset, MKCONT(send_bind_cont,lc)); assert(err_is_ok(err)); // we hold the monitor binding mutex } else { // permanent failure sending message event_mutex_unlock(&b->mutex); lc->bind_continuation.handler(lc->bind_continuation.st, err_push(err, LIB_ERR_BIND_LMP_REQ), NULL); } }
static void span_domain_request_sender(void *arg) { struct monitor_binding *mb = arg; struct span_domain_state *st = mb->st; errval_t err = mb->tx_vtbl. span_domain_request(mb, NOP_CONT, (uintptr_t)st, st->core_id, st->vroot, st->frame); if (err_is_ok(err)) { event_mutex_unlock(&mb->mutex); } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { /* Wait to use the monitor binding */ err = mb->register_send(mb, mb->waitset, MKCONT(span_domain_request_sender,mb)); if(err_is_fail(err)) { // shouldn't fail, as we have the mutex USER_PANIC_ERR(err, "register_send"); } } else { // permanent error event_mutex_unlock(&mb->mutex); err = err_push(err, MON_CLIENT_ERR_SPAN_DOMAIN_REQUEST); DEBUG_ERR(err, "span_domain_request"); } }
void * __clWorkThreadCopyBuffer (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; _cl_mem *src_memobj_data = work_item_data->mem_src_buffer_data; _cl_mem *dst_memobj_data = work_item_data->mem_dst_buffer_data; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "CopyBuffer: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadCopyBuffer", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadCopyBuffer", event_data); // copy work_item_data->mem_scr_buffer to work_item_data->mem_dst_buffer_data if (src_memobj_data == NULL || dst_memobj_data == NULL) { // invalid buffers... #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "__clWorkThreadCopyBuffer: invalid buffers\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_INVALID_ARG_VALUE; return __clWorkThreadFinish (work_item_data, pthread_self ()); } // copy the data memcpy (dst_memobj_data->data + work_item_data->mem_dst_offset, src_memobj_data->data + work_item_data->mem_src_offset, work_item_data->mem_cb); #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "CopyBuffer: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
void * __clWorkThreadUnmapBuffer (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; _cl_mem *memobj_data = work_item_data->mem_src_buffer_data; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "UnmapBuffer: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadUnmapBuffer", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadUnmapBuffer", event_data); if (!memobj_data) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "UnmapBuffer: invalid buffer\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_INVALID_ARG_VALUE; return __clWorkThreadFinish (work_item_data, pthread_self ()); } memobj_data->map_count--; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "UnmapBuffer: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
void * __clWorkThreadReleaseGLObjects (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; cl_uint num_gl_memobjects = work_item_data->num_gl_mem_objects; _cl_mem **gl_memobjects = (_cl_mem **) (work_item_data->gl_mem_objects); _cl_mem *cur_gl_memobject = NULL; cl_uint i; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "ReleaseGLObjects: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadReleaseGLObjects", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadReleaseGLObjects", event_data); for (i = 0; i < num_gl_memobjects; i++) { cur_gl_memobject = gl_memobjects[i]; clReleaseMemObject ((cl_mem) cur_gl_memobject); } #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "ReleaseGLObjects: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
int epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv) { struct epollop *epollop = arg; struct epoll_event *events = epollop->events; struct evepoll *evep; int i, res, timeout; if (evsignal_deliver(&epollop->evsigmask) == -1) return (-1); timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000; benchmark_stop_sample(); event_mutex_unlock(base); res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout); event_mutex_lock(base); benchmark_start_sample(); if (evsignal_recalc(&epollop->evsigmask) == -1) return (-1); if (res == -1) { if (errno != EINTR) { event_warn("epoll_wait"); return (-1); } evsignal_process(); return (0); } else if (evsignal_caught) evsignal_process(); event_debug(("%s: epoll_wait reports %d", __func__, res)); for (i = 0; i < res; i++) { int which = 0; int what = events[i].events; struct event *evread = NULL, *evwrite = NULL; evep = (struct evepoll *)events[i].data.ptr; if (what & EPOLLHUP) what |= EPOLLIN | EPOLLOUT; else if (what & EPOLLERR) what |= EPOLLIN | EPOLLOUT; if (what & EPOLLIN) { evread = evep->evread; which |= EV_READ; } if (what & EPOLLOUT) { evwrite = evep->evwrite; which |= EV_WRITE; } if (!which) continue; if (evread != NULL && !(evread->ev_events & EV_PERSIST)) event_del(evread); if (evwrite != NULL && evwrite != evread && !(evwrite->ev_events & EV_PERSIST)) event_del(evwrite); if (evread != NULL) event_active(evread, EV_READ, 1); if (evwrite != NULL) event_active(evwrite, EV_WRITE, 1); } return (0); }
void * __clWorkThreadCopyBufferRect (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; _cl_mem *src_memobj_data = work_item_data->mem_src_buffer_data; _cl_mem *dst_memobj_data = work_item_data->mem_dst_buffer_data; unsigned char *dest_ptr = NULL; unsigned char *src_ptr = NULL; cl_uint i, j; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "CopyBufferRect: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadCopyBufferRect", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadCopyBufferRect", event_data); // copy work_item_data->mem_src_buffer_data to work_item_data->mem_ptr if (!dst_memobj_data || !src_memobj_data) { // invalid buffers... #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "__clWorkThreadCopyBufferRect: invalid buffers\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_INVALID_ARG_VALUE; return __clWorkThreadFinish (work_item_data, pthread_self ()); } // copy the data for (i = 0; i < work_item_data->image_region[2]; i++) { dest_ptr = dst_memobj_data->data + work_item_data->mem_dst_offset; dest_ptr += i * work_item_data->dest_slice_pitch; src_ptr = src_memobj_data->data + work_item_data->mem_src_offset; src_ptr += i * work_item_data->source_slice_pitch; for (j = 0; j < work_item_data->image_region[1]; j++) { memcpy (dest_ptr, src_ptr, work_item_data->image_region[0]); dest_ptr += work_item_data->dest_row_pitch; src_ptr += work_item_data->source_row_pitch; } } #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "CopyBufferRect: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
void * __clWorkThreadFinish (_cl_work_item_data * work_item_data, pthread_t thread_id) { int i; _cl_event *event_data = NULL; _cl_command_queue *command_queue_data = NULL; cl_bool task_done = CL_FALSE; if (!work_item_data) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clWorkThreadFinish: work_item_data is NULL, thread returning NULL\n"); #endif // #ifdef OCL_DEBUG_MESSAGES return NULL; } // work_item_data will be gone after call to __clRemoveWorkItemFromQueue // so store the queue and thread_id event_data = work_item_data->event_data; if (!event_data) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clWorkThreadFinish: work item event_data is NULL, thread returning NULL\n"); #endif // #ifdef OCL_DEBUG_MESSAGES return NULL; } command_queue_data = (_cl_command_queue*)event_data->command_queue; queue_mutex_lock ("__clWorkThreadFinish", command_queue_data); event_mutex_lock ("__clWorkThreadFinish", event_data); for (i = 0; i < MAX_THREADS_PER_WORK_ITEM; i++) { if (thread_id && thread_id == event_data->thread[i]) { event_data->done_count++; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clWorkThreadFinish: event (0x%08x) thread (%d) @(%d)\n", (int) event_data, (int) event_data->thread[i], i); #endif // #ifdef OCL_DEBUG_MESSAGES break; } } if (event_data->done_count == event_data->thread_count) { // update the associated event status event_data->status = CL_COMPLETE; if (event_data->use_profiling) { event_data->profiling_end = __clTimeStamp (); } if (event_data->error != CL_SUCCESS) { event_data->status = event_data->error; } } event_mutex_unlock ("__clWorkThreadFinish", event_data); // we test count-1, since 'done_count' is updated at the very end of thread // to avoid synchronization problems. So if done_count is count-1, we are at // last thread if (event_data->done_count == event_data->thread_count || event_data->error != CL_SUCCESS) { task_done = CL_TRUE; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clWorkThreadFinish: all work item threads (%d) have finished\n", event_data->thread_count); #endif // #ifdef OCL_DEBUG_MESSAGES #ifdef OCL_BUILD_VERSION_1_1 queue_mutex_unlock ("__clWorkThreadFinish", command_queue_data); __clEventCallUserCallbacks(event_data); queue_mutex_lock ("__clWorkThreadFinish", command_queue_data); #endif // #ifdef OCL_BUILD_VERSION_1_1 // all work item threads have finished, remove work item __clRemoveWorkItemFromQueue (work_item_data); } if (thread_id) { pthread_detach (thread_id); } queue_mutex_unlock ("__clWorkThreadFinish", command_queue_data); if (task_done) { __clRunAllQueues (); } #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clWorkThreadFinish: Thread returning NULL\n"); #endif // #ifdef OCL_DEBUG_MESSAGES return NULL; }
void __clRemoveWorkItemFromQueue (_cl_work_item_data * work_item_data) { // access the work queue int i; _cl_command_queue *command_queue_data = (_cl_command_queue *) (work_item_data->command_queue); cl_command_queue command_queue = work_item_data->command_queue; _cl_event *event_data = work_item_data->event_data; _cl_kernel *kernel_data = work_item_data->kernel_data; cl_uint num_events_in_wait_list = 0; // remove work item data pointer from the queue list __clListRemoveNode (&command_queue_data->work_item_queue_root, work_item_data, 0); if (kernel_data) { if (kernel_data->program_data) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clRemoveWorkItemFromQueue: releasing program (0x%08x)\n", (int) kernel_data->program_data); #endif // #ifdef OCL_DEBUG_MESSAGES clReleaseProgram ((cl_program) kernel_data->program_data); kernel_data->program_data = NULL; } clReleaseKernel ((cl_kernel) work_item_data->kernel_data); work_item_data->kernel_data = NULL; } if (work_item_data->mem_src_buffer_data) { clReleaseMemObject ((cl_mem) work_item_data->mem_src_buffer_data); work_item_data->mem_src_buffer_data = NULL; } if (work_item_data->mem_dst_buffer_data) { clReleaseMemObject ((cl_mem) work_item_data->mem_dst_buffer_data); work_item_data->mem_dst_buffer_data = NULL; } event_mutex_lock ("__clRemoveWorkItemFromQueue", event_data); for (i = 0; i < MAX_THREADS_PER_WORK_ITEM; i++) { if (event_data->thread[i]) { event_data->thread[i] = 0; } } event_mutex_unlock ("__clRemoveWorkItemFromQueue", event_data); if (work_item_data->num_events_in_wait_list) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clRemoveWorkItemFromQueue: releasing event (0x%08x) wait list\n", (int) event_data); #endif // #ifdef OCL_DEBUG_MESSAGES num_events_in_wait_list = work_item_data->num_events_in_wait_list; work_item_data->num_events_in_wait_list = 0; for (i = 0; i < num_events_in_wait_list; i++) { clReleaseEvent (work_item_data->event_wait_list[i]); } __clFree (work_item_data->event_wait_list); work_item_data->event_wait_list = NULL; } #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clRemoveWorkItemFromQueue: releasing event (0x%08x)\n", (int) event_data); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->event_data = NULL; clReleaseEvent ((cl_event) event_data); __clListRemoveNode (&command_queue_data->work_item_data_root, work_item_data, 1); work_item_data = NULL; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clRemoveWorkItemFromQueue: work item removed\n"); #endif // #ifdef OCL_DEBUG_MESSAGES clReleaseCommandQueue (command_queue); #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "__clRemoveWorkItemFromQueue: DONE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES }
void * __clWorkThreadExecuteKernelGPU (void *params) { // call the execute kernel entry function _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; _cl_kernel *kernel_data = work_item_data->kernel_data; CL_KERNEL_ARGUMENT_DATA arguments; cl_kernel_work_dimensions thread_work_dimensions; int thread_index = 0; cl_entryFunction fn = NULL; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "kernel (GPU): START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES thread_index = __clGetWorkThreadIndex (event_data); if (thread_index < 0) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "kernel (GPU): failed to find thread index, exiting\n"); #endif // #ifdef OCL_DEBUG_MESSAGES return __clWorkThreadFinish (work_item_data, pthread_self ()); } memcpy (&thread_work_dimensions, &work_item_data->thread_work_dimensions[thread_index], sizeof (cl_kernel_work_dimensions)); #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "Executing kernel (GPU): parsing kernel arguments\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = __clBuildKernelArgumentData (work_item_data, &arguments); if (work_item_data->error != CL_SUCCESS) { return __clWorkThreadFinish (work_item_data, pthread_self ()); } event_mutex_lock ("__clWorkThreadExecuteKernelGPU", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadExecuteKernelGPU", event_data); fn = kernel_data->entry_fn; (*fn) (&thread_work_dimensions, arguments); #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "kernel (GPU): COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
void * __clWorkThreadAcquireGLObjects (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; cl_uint num_gl_memobjects = work_item_data->num_gl_mem_objects; _cl_mem **gl_memobjects = (_cl_mem **) (work_item_data->gl_mem_objects); _cl_mem *cur_gl_memobject = NULL; unsigned char *rgb_data = NULL; cl_uint i, j, k, rgb_size, rgba_size; Display *display = work_item_data->gl_display; GLXContext work_thread_ctx; XVisualInfo *visual_info; // attributes for a single buffered visual in RGBA format with at least // 4 bits per color and a 16 bit depth buffer int attrListSgl[] = { GLX_RGBA, GLX_RED_SIZE, 4, GLX_GREEN_SIZE, 4, GLX_BLUE_SIZE, 4, GLX_DEPTH_SIZE, 16, None }; // attributes for a double buffered visual in RGBA format with at least // 4 bits per color and a 16 bit depth buffer */ int attrListDbl[] = { GLX_RGBA, GLX_DOUBLEBUFFER, GLX_RED_SIZE, 4, GLX_GREEN_SIZE, 4, GLX_BLUE_SIZE, 4, GLX_DEPTH_SIZE, 16, None }; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadAcquireGLObjects", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadAcquireGLObjects", event_data); // First, try to get double buffered visual #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Getting visual info...\n"); #endif // #ifdef OCL_DEBUG_MESSAGES visual_info = glXChooseVisual (work_item_data->gl_display, DefaultScreen (work_item_data->gl_display), attrListDbl); if (visual_info == NULL) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Could not get double buffered visual, trying single buffered one\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // Getting double buffered visual failed, try to get single buffered one visual_info = glXChooseVisual (work_item_data->gl_display, work_item_data->gl_drawable, attrListSgl); if (visual_info == NULL) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "AcquireGLObjects: Error getting the visual\n"); #endif // #ifdef OCL_DEBUG_MESSAGES } else { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Got single buffered visual\n"); #endif // #ifdef OCL_DEBUG_MESSAGES } } else { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Got double buffered visual\n"); #endif // #ifdef OCL_DEBUG_MESSAGES } // create an OpenGL context for this thread, this context is sharing // the main thread's context #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Creating OpenGL context...\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_thread_ctx = glXCreateContext (work_item_data->gl_display, visual_info, work_item_data->gl_ctx, GL_TRUE); free (visual_info); #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Setting current context...\n"); #endif // #ifdef OCL_DEBUG_MESSAGES if (glXMakeCurrent (work_item_data->gl_display, work_item_data->gl_drawable, work_thread_ctx)) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Current context set\n"); #endif // #ifdef OCL_DEBUG_MESSAGES } else { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "AcquireGLObjects: Error setting current context\n"); #endif // #ifdef OCL_DEBUG_MESSAGES } for (i = 0; i < num_gl_memobjects; i++) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Handling object: %d...\n", i); #endif // #ifdef OCL_DEBUG_MESSAGES cur_gl_memobject = gl_memobjects[i]; rgb_size = cur_gl_memobject->gl_texture_width * cur_gl_memobject->gl_texture_height * 3; rgba_size = cur_gl_memobject->gl_texture_width * cur_gl_memobject->gl_texture_height * 4; rgb_data = (unsigned char *) __clAlloc (rgb_size); if (!rgb_data) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "AcquireGLObjects: out of memory\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_OUT_OF_HOST_MEMORY; return __clWorkThreadFinish (work_item_data, pthread_self ()); } k = 0; for (j = 0; j < rgba_size; j += 4) { rgb_data[k] = cur_gl_memobject->data[j]; /* r */ rgb_data[k + 1] = cur_gl_memobject->data[j + 1]; /* g */ rgb_data[k + 2] = cur_gl_memobject->data[j + 2]; /* b */ k += 3; } glBindTexture (cur_gl_memobject->gl_texture_target, cur_gl_memobject->gl_texture_id); glTexImage2D (GL_TEXTURE_2D, 0, 3, cur_gl_memobject->gl_texture_width, cur_gl_memobject->gl_texture_height, 0, GL_RGB, GL_UNSIGNED_BYTE, rgb_data); __clFree (rgb_data); #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Object: %d done\n", i); #endif // #ifdef OCL_DEBUG_MESSAGES } // cleanup the OpenGL context created for this thread #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: Cleaning up OpenGL context\n"); #endif // #ifdef OCL_DEBUG_MESSAGES glXMakeCurrent (display, None, NULL); glXDestroyContext (display, work_thread_ctx); #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "AcquireGLObjects: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
void * __clWorkThreadBufferToImage (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; _cl_mem *src_memobj_data = work_item_data->mem_src_buffer_data; _cl_mem *dst_memobj_data = work_item_data->mem_dst_buffer_data; unsigned char *src_buffer_data = NULL; unsigned char *dst_image_data = NULL; cl_uint i; cl_uint dst_horline_bytes = 0; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "CopyBufferToImage: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadBufferToImage", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadBufferToImage", event_data); // copy work_item_data->mem_scr_buffer to work_item_data->mem_dst_buffer_data if (src_memobj_data == NULL || dst_memobj_data == NULL) { // invalid buffers... #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "CopyBufferToImage: invalid buffers\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_INVALID_ARG_VALUE; return __clWorkThreadFinish (work_item_data, pthread_self ()); } // copy horlines src_buffer_data = src_memobj_data->data; dst_image_data = dst_memobj_data->data; src_buffer_data += work_item_data->mem_src_offset; dst_image_data += work_item_data->image_dst_origin[1] * dst_memobj_data->image2d_info->pitch; dst_image_data += work_item_data->image_dst_origin[0] * dst_memobj_data->image2d_info->element_size; dst_horline_bytes = work_item_data->image_region[0] * dst_memobj_data->image2d_info->element_size; for (i = 0; i < work_item_data->image_region[1]; i++) { // copy horline to destination image memcpy (dst_image_data, src_buffer_data, min(dst_horline_bytes,dst_memobj_data->image2d_info->pitch)); // move source pointer to next horline src_buffer_data += dst_horline_bytes; // move destination pointer to next horline dst_image_data += dst_memobj_data->image2d_info->pitch; } #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "CopyBufferToImage: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
void * __clWorkThreadWriteImage (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; _cl_mem *dst_memobj_data = work_item_data->mem_dst_buffer_data; unsigned char *dest_image_data = NULL; unsigned char *src_ptr = NULL; cl_uint i; cl_uint horline_bytes = 0; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "WriteImage: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadWriteImage", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadWriteImage", event_data); if (work_item_data->mem_ptr == NULL || dst_memobj_data == NULL) { // invalid buffers... #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "__clWorkThreadWriteImage: invalid buffers\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_INVALID_ARG_VALUE; return __clWorkThreadFinish (work_item_data, pthread_self ()); } if (dst_memobj_data->data == NULL || dst_memobj_data->image2d_info == NULL) { #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "__clWorkThreadWriteImage: invalid destination image\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_INVALID_ARG_VALUE; return __clWorkThreadFinish (work_item_data, pthread_self ()); } // write data to mem_dst_buffer_data // find start location dest_image_data = dst_memobj_data->data; src_ptr = (unsigned char *) work_item_data->mem_ptr; dest_image_data += work_item_data->image_dst_origin[1] * dst_memobj_data->image2d_info->pitch; dest_image_data += work_item_data->image_dst_origin[0] * dst_memobj_data->image2d_info->element_size; horline_bytes = work_item_data->image_row_pitch; for (i = 0; i < work_item_data->image_region[1]; i++) { // copy horline to destination pointer #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "0x%08x-0x%08x-%d\n", dest_image_data, src_ptr, horline_bytes); #endif // #ifdef OCL_DEBUG_MESSAGES // memcpy (dest_image_data, src_ptr, horline_bytes); memcpy (dest_image_data, src_ptr, min(horline_bytes,dst_memobj_data->image2d_info->pitch)); // corrected due to pitch // move destination pointer to next horline //dest_image_data += work_item_data->image_row_pitch; dest_image_data += dst_memobj_data->image2d_info->pitch; // move source pointer to next horline src_ptr += horline_bytes; } #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "WriteImage: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
void * __clWorkThreadReadImage (void *params) { _cl_work_item_data *work_item_data = (_cl_work_item_data *) params; _cl_event *event_data = work_item_data->event_data; _cl_mem *src_memobj_data = work_item_data->mem_src_buffer_data; unsigned char *src_image_data = NULL; unsigned char *dest_ptr = NULL; cl_uint i; cl_uint horline_bytes = 0; #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "ReadImage: START\n"); #endif // #ifdef OCL_DEBUG_MESSAGES // update the associated event status event_mutex_lock ("__clWorkThreadReadImage", event_data); event_data->status = CL_RUNNING; if (event_data->use_profiling) { event_data->profiling_start = __clTimeStamp (); } event_mutex_unlock ("__clWorkThreadReadImage", event_data); if (work_item_data->mem_ptr == NULL || src_memobj_data == NULL) { // invalid buffers... #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_ERROR, "__clWorkThreadReadImage: invalid buffers\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_INVALID_ARG_VALUE; return __clWorkThreadFinish (work_item_data, pthread_self ()); } // read data from mem_src_buffer_data // find start location src_image_data = src_memobj_data->data; dest_ptr = (unsigned char *) work_item_data->mem_ptr; src_image_data += work_item_data->image_src_origin[1] * work_item_data->image_row_pitch; src_image_data += work_item_data->image_src_origin[0] * src_memobj_data->image2d_info->element_size; horline_bytes = work_item_data->image_region[0] * src_memobj_data->image2d_info->element_size; for (i = 0; i < work_item_data->image_region[1]; i++) { // do not copy if source and destination pointers are the same // this may occur when using CL_MEM_USE_HOST_PTR buffers if (dest_ptr != src_image_data) { // copy horline to destination pointer memcpy (dest_ptr, src_image_data, min(horline_bytes,work_item_data->image_row_pitch)); } // move source pointer to next horline src_image_data += work_item_data->image_row_pitch; // move destination pointer to next horline dest_ptr += horline_bytes; } #ifdef OCL_DEBUG_MESSAGES __clDebugPrint (CL_LOG_DEBUG, "ReadImage: COMPLETE\n"); #endif // #ifdef OCL_DEBUG_MESSAGES work_item_data->error = CL_SUCCESS; return __clWorkThreadFinish (work_item_data, pthread_self ()); }
void flounder_support_monitor_mutex_unlock(struct monitor_binding *mb) { event_mutex_unlock(&mb->mutex); }