cl_int clFinishFCL (cl_command_queue command_queue) { MSG(clFinishFCL); if (!FreeOCL::is_valid(command_queue)) return CL_INVALID_COMMAND_QUEUE; if (command_queue->done()) { command_queue->unlock(); return CL_SUCCESS; } command_queue->unlock(); cl_event event; cl_int err = clEnqueueMarkerFCL(command_queue, &event); if (err != CL_SUCCESS) return err; err = clWaitForEventsFCL(1, &event); clReleaseEventFCL(event); return err; }
size_t _cl_command_queue::proc() { while(!b_stop) { b_working = false; lock(); while (queue.empty()) { wait_locked(); if (b_stop) { unlock(); return 0; } } FreeOCL::smartptr<FreeOCL::command> cmd = queue.front(); b_working = true; queue.pop_front(); unlock(); if (b_stop) break; if (!is_command_ready_to_process(cmd)) { if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { // Find something else to process std::deque<FreeOCL::smartptr<FreeOCL::command> > waiting_queue; waiting_queue.push_front(cmd); bool b_found = false; lock(); while(!queue.empty() && !b_found) { cmd = queue.front(); queue.pop_front(); unlock(); if (cmd->get_type() == CL_COMMAND_MARKER) { lock(); queue.push_front(cmd); break; } b_found = is_command_ready_to_process(cmd); if (!b_found) waiting_queue.push_front(cmd); lock(); } for(std::deque<FreeOCL::smartptr<FreeOCL::command> >::const_iterator i = waiting_queue.begin() ; i != waiting_queue.end() ; ++i) queue.push_front(*i); if (!b_found) // No choice, we must try later { wait_locked(); unlock(); continue; } unlock(); } else { // Wait for events (those events are likely to come from another command queue or are user events) clWaitForEventsFCL(cmd->num_events_in_wait_list, cmd->event_wait_list); } } if (cmd->event) { cmd->event->lock(); cmd->event->change_status(CL_RUNNING); cmd->event->unlock(); } switch(cmd->get_type()) { case CL_COMMAND_READ_IMAGE: case CL_COMMAND_READ_BUFFER_RECT: { char *dst_ptr = (char*)cmd.as<FreeOCL::command_read_buffer_rect>()->ptr; const char *src_ptr = (const char*)cmd.as<FreeOCL::command_read_buffer_rect>()->buffer->ptr + cmd.as<FreeOCL::command_read_buffer_rect>()->offset; const size_t src_row_pitch = cmd.as<FreeOCL::command_read_buffer_rect>()->buffer_pitch[0]; const size_t dst_row_pitch = cmd.as<FreeOCL::command_read_buffer_rect>()->host_pitch[0]; const size_t cb[3] = { cmd.as<FreeOCL::command_read_buffer_rect>()->cb[0], cmd.as<FreeOCL::command_read_buffer_rect>()->cb[1], cmd.as<FreeOCL::command_read_buffer_rect>()->cb[2] }; const size_t src_slice_pitch = cmd.as<FreeOCL::command_read_buffer_rect>()->buffer_pitch[1] - src_row_pitch * cb[1]; const size_t dst_slice_pitch = cmd.as<FreeOCL::command_read_buffer_rect>()->host_pitch[1] - dst_row_pitch * cb[1]; for(size_t z = 0 ; z < cb[2] ; ++z, src_ptr += src_slice_pitch, dst_ptr += dst_slice_pitch) for(size_t y = 0 ; y < cb[1] ; ++y, src_ptr += src_row_pitch, dst_ptr += dst_row_pitch) memcpy(dst_ptr, src_ptr, cb[0]); } break; case CL_COMMAND_WRITE_IMAGE: case CL_COMMAND_WRITE_BUFFER_RECT: { const char *src_ptr = (const char*)cmd.as<FreeOCL::command_write_buffer_rect>()->ptr; char *dst_ptr = (char*)cmd.as<FreeOCL::command_write_buffer_rect>()->buffer->ptr + cmd.as<FreeOCL::command_write_buffer_rect>()->offset; const size_t dst_row_pitch = cmd.as<FreeOCL::command_write_buffer_rect>()->buffer_pitch[0]; const size_t src_row_pitch = cmd.as<FreeOCL::command_write_buffer_rect>()->host_pitch[0]; const size_t cb[3] = { cmd.as<FreeOCL::command_write_buffer_rect>()->cb[0], cmd.as<FreeOCL::command_write_buffer_rect>()->cb[1], cmd.as<FreeOCL::command_write_buffer_rect>()->cb[2] }; const size_t dst_slice_pitch = cmd.as<FreeOCL::command_write_buffer_rect>()->buffer_pitch[1] - dst_row_pitch * cb[1]; const size_t src_slice_pitch = cmd.as<FreeOCL::command_write_buffer_rect>()->host_pitch[1] - src_row_pitch * cb[1]; for(size_t z = 0 ; z < cb[2] ; ++z, src_ptr += src_slice_pitch, dst_ptr += dst_slice_pitch) for(size_t y = 0 ; y < cb[1] ; ++y, src_ptr += src_row_pitch, dst_ptr += dst_row_pitch) memcpy(dst_ptr, src_ptr, cb[0]); } break; case CL_COMMAND_COPY_IMAGE_TO_BUFFER: case CL_COMMAND_COPY_BUFFER_TO_IMAGE: case CL_COMMAND_COPY_IMAGE: case CL_COMMAND_COPY_BUFFER_RECT: { const char *src_ptr = (const char*)cmd.as<FreeOCL::command_copy_buffer_rect>()->src_buffer->ptr + cmd.as<FreeOCL::command_copy_buffer_rect>()->src_offset; char *dst_ptr = (char*)cmd.as<FreeOCL::command_copy_buffer_rect>()->dst_buffer->ptr + cmd.as<FreeOCL::command_copy_buffer_rect>()->dst_offset; const size_t dst_row_pitch = cmd.as<FreeOCL::command_copy_buffer_rect>()->dst_pitch[0]; const size_t src_row_pitch = cmd.as<FreeOCL::command_copy_buffer_rect>()->src_pitch[0]; const size_t cb[3] = { cmd.as<FreeOCL::command_copy_buffer_rect>()->cb[0], cmd.as<FreeOCL::command_copy_buffer_rect>()->cb[1], cmd.as<FreeOCL::command_copy_buffer_rect>()->cb[2] }; const size_t dst_slice_pitch = cmd.as<FreeOCL::command_copy_buffer_rect>()->dst_pitch[1] - dst_row_pitch * cb[1]; const size_t src_slice_pitch = cmd.as<FreeOCL::command_copy_buffer_rect>()->src_pitch[1] - src_row_pitch * cb[1]; for(size_t z = 0 ; z < cb[2] ; ++z, src_ptr += src_slice_pitch, dst_ptr += dst_slice_pitch) for(size_t y = 0 ; y < cb[1] ; ++y, src_ptr += src_row_pitch, dst_ptr += dst_row_pitch) memcpy(dst_ptr, src_ptr, cb[0]); } break; case CL_COMMAND_READ_BUFFER: memcpy(cmd.as<FreeOCL::command_read_buffer>()->ptr, (char*)cmd.as<FreeOCL::command_read_buffer>()->buffer->ptr + cmd.as<FreeOCL::command_read_buffer>()->offset, cmd.as<FreeOCL::command_read_buffer>()->cb); break; case CL_COMMAND_WRITE_BUFFER: memcpy((char*)cmd.as<FreeOCL::command_write_buffer>()->buffer->ptr + cmd.as<FreeOCL::command_write_buffer>()->offset, cmd.as<FreeOCL::command_write_buffer>()->ptr, cmd.as<FreeOCL::command_write_buffer>()->cb); break; case CL_COMMAND_COPY_BUFFER: memcpy((char*)cmd.as<FreeOCL::command_copy_buffer>()->dst_buffer->ptr + cmd.as<FreeOCL::command_copy_buffer>()->dst_offset, (char*)cmd.as<FreeOCL::command_copy_buffer>()->src_buffer->ptr + cmd.as<FreeOCL::command_copy_buffer>()->src_offset, cmd.as<FreeOCL::command_copy_buffer>()->cb); break; case CL_COMMAND_MAP_IMAGE: case CL_COMMAND_MAP_BUFFER: cmd.as<FreeOCL::command_map_buffer>()->buffer->lock(); cmd.as<FreeOCL::command_map_buffer>()->buffer->mapped.insert(cmd.as<FreeOCL::command_map_buffer>()->ptr); cmd.as<FreeOCL::command_map_buffer>()->buffer->unlock(); break; case CL_COMMAND_UNMAP_MEM_OBJECT: cmd.as<FreeOCL::command_unmap_buffer>()->buffer->lock(); cmd.as<FreeOCL::command_unmap_buffer>()->buffer->mapped.erase(cmd.as<FreeOCL::command_unmap_buffer>()->ptr); cmd.as<FreeOCL::command_unmap_buffer>()->buffer->unlock(); break; case CL_COMMAND_NATIVE_KERNEL: cmd.as<FreeOCL::command_native_kernel>()->user_func(cmd.as<FreeOCL::command_native_kernel>()->args); free(cmd.as<FreeOCL::command_native_kernel>()->args); break; case CL_COMMAND_NDRANGE_KERNEL: { FreeOCL::command_ndrange_kernel *ptr = cmd.as<FreeOCL::command_ndrange_kernel>(); const bool b_use_sync = ptr->kernel->__FCL_init(ptr->args, ptr->dim, ptr->global_offset, ptr->global_size, ptr->local_size); device->pool->set_local_size(ptr->local_size); device->pool->set_require_sync(b_use_sync); device->pool->set_thread_num(device->cpu_cores); const size_t num_groups[3] = { ptr->global_size[0] / ptr->local_size[0], ptr->global_size[1] / ptr->local_size[1], ptr->global_size[2] / ptr->local_size[2] }; device->pool->set_num_groups(num_groups); device->pool->run(ptr->kernel->__FCL_setwg, ptr->kernel->__FCL_kernel); if (ptr->args) free(ptr->args); } break; case CL_COMMAND_FILL_BUFFER: { FreeOCL::command_fill_buffer *cfb = cmd.as<FreeOCL::command_fill_buffer>(); const size_t nb_elts = cfb->size / cfb->pattern_size; switch(cfb->pattern_size) { case 1: memset(cfb->offset + (char*)cfb->buffer->ptr, *(char*)cfb->pattern, cfb->size); break; case 2: { cl_ushort * const ptr = (cfb->offset >> 1) + (cl_ushort*)cfb->buffer->ptr; const cl_ushort &pattern = *(const cl_ushort*)cfb->pattern; for(size_t i = 0 ; i < nb_elts ; ++i) ptr[i] = pattern; } break; case 4: { cl_uint * const ptr = (cfb->offset >> 2) + (cl_uint*)cfb->buffer->ptr; const cl_uint &pattern = *(const cl_uint*)cfb->pattern; for(size_t i = 0 ; i < nb_elts ; ++i) ptr[i] = pattern; } break; case 8: { cl_ulong * const ptr = (cfb->offset >> 3) + (cl_ulong*)cfb->buffer->ptr; const cl_ulong &pattern = *(const cl_ulong*)cfb->pattern; for(size_t i = 0 ; i < nb_elts ; ++i) ptr[i] = pattern; } break; case 16: { cl_float4 * const ptr = (cfb->offset >> 4) + (cl_float4*)cfb->buffer->ptr; const cl_float4 &pattern = *(const cl_float4*)cfb->pattern; for(size_t i = 0 ; i < nb_elts ; ++i) ptr[i] = pattern; } break; case 32: { cl_float8 * const ptr = (cfb->offset >> 5) + (cl_float8*)cfb->buffer->ptr; const cl_float8 &pattern = *(const cl_float8*)cfb->pattern; for(size_t i = 0 ; i < nb_elts ; ++i) ptr[i] = pattern; } break; case 64: { cl_float16 * const ptr = (cfb->offset >> 6) + (cl_float16*)cfb->buffer->ptr; const cl_float16 &pattern = *(const cl_float16*)cfb->pattern; for(size_t i = 0 ; i < nb_elts ; ++i) ptr[i] = pattern; } break; case 128: { cl_double16 * const ptr = (cfb->offset >> 7) + (cl_double16*)cfb->buffer->ptr; const cl_double16 &pattern = *(const cl_double16*)cfb->pattern; for(size_t i = 0 ; i < nb_elts ; ++i) ptr[i] = pattern; } break; } free(cfb->pattern); } break; case CL_COMMAND_FILL_IMAGE: cmd.as<FreeOCL::command_fill_image>()->process(); free(cmd.as<FreeOCL::command_fill_image>()->fill_color); break; } if (cmd->event) { cmd->event->lock(); cmd->event->change_status(CL_COMPLETE); cmd->event->unlock(); } } return 0; }
void * clEnqueueMapBufferFCL (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret) { MSG(clEnqueueMapBufferFCL); if (map_flags & ~(CL_MAP_READ | CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { SET_RET(CL_INVALID_VALUE); return NULL; } FreeOCL::unlocker unlock; if (!FreeOCL::is_valid(command_queue)) { SET_RET(CL_INVALID_COMMAND_QUEUE); return NULL; } unlock.handle(command_queue); if (!FreeOCL::is_valid(command_queue->context)) { SET_RET(CL_INVALID_CONTEXT); return NULL; } command_queue->context->unlock(); if (!FreeOCL::is_valid(buffer)) { SET_RET(CL_INVALID_MEM_OBJECT); return NULL; } unlock.handle(buffer); if (buffer->size < offset + cb) { SET_RET(CL_INVALID_VALUE); return NULL; } void *p = (char*)buffer->ptr + offset; if ((num_events_in_wait_list == 0 || event_wait_list == NULL) && blocking_map == CL_FALSE) { buffer->mapped.insert(p); if (event) { cl_event e = new _cl_event(command_queue->context); *event = e; e->command_queue = command_queue; e->command_type = CL_COMMAND_MAP_BUFFER; e->status = CL_QUEUED; e->change_status(CL_QUEUED); e->change_status(CL_SUBMITTED); e->change_status(CL_RUNNING); e->change_status(CL_COMPLETE); } } else { FreeOCL::smartptr<FreeOCL::command_map_buffer> cmd = new FreeOCL::command_map_buffer; cmd->num_events_in_wait_list = num_events_in_wait_list; cmd->event_wait_list = event_wait_list; cmd->event = (blocking_map == CL_TRUE || event) ? new _cl_event(command_queue->context) : NULL; if (cmd->event) { cmd->event->command_queue = command_queue; cmd->event->command_type = CL_COMMAND_MAP_BUFFER; cmd->event->status = CL_QUEUED; if (event) *event = cmd->event.weak(); } cmd->buffer = buffer; cmd->ptr = p; unlock.forget(command_queue); command_queue->enqueue(cmd); unlock.unlockall(); if (blocking_map == CL_TRUE) { clWaitForEventsFCL(1, &cmd->event.weak()); if (event == NULL) clReleaseEventFCL(cmd->event.weak()); } } SET_RET(CL_SUCCESS); return p; }
cl_int clEnqueueWriteBufferFCL (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { MSG(clEnqueueWriteBufferFCL); FreeOCL::unlocker unlock; if (ptr == NULL) return CL_INVALID_VALUE; if (!FreeOCL::is_valid(command_queue)) return CL_INVALID_COMMAND_QUEUE; unlock.handle(command_queue); if (!FreeOCL::is_valid(command_queue->context)) return CL_INVALID_CONTEXT; command_queue->context->unlock(); if (!FreeOCL::is_valid(buffer)) return CL_INVALID_MEM_OBJECT; unlock.handle(buffer); if (!FreeOCL::is_valid(buffer->context)) return CL_INVALID_CONTEXT; buffer->context->unlock(); if (buffer->context != command_queue->context) return CL_INVALID_CONTEXT; if (buffer->size < offset + cb) return CL_INVALID_VALUE; if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) return CL_INVALID_OPERATION; if (event_wait_list == NULL && num_events_in_wait_list > 0) return CL_INVALID_EVENT_WAIT_LIST; if (event_wait_list != NULL && num_events_in_wait_list == 0) return CL_INVALID_EVENT_WAIT_LIST; for(size_t i = 0 ; i < num_events_in_wait_list ; ++i) { if (!FreeOCL::is_valid(event_wait_list[i])) return CL_INVALID_EVENT_WAIT_LIST; unlock.handle(event_wait_list[i]); } if (blocking_write == CL_TRUE) { for(size_t i = 0 ; i < num_events_in_wait_list ; ++i) if (event_wait_list[i]->status < 0) return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; } FreeOCL::smartptr<FreeOCL::command_write_buffer> cmd = new FreeOCL::command_write_buffer; cmd->num_events_in_wait_list = num_events_in_wait_list; cmd->event_wait_list = event_wait_list; cmd->event = (blocking_write == CL_TRUE || event) ? new _cl_event(command_queue->context) : NULL; cmd->buffer = buffer; cmd->offset = offset; cmd->cb = cb; cmd->ptr = ptr; if (cmd->event) { cmd->event->command_queue = command_queue; cmd->event->command_type = CL_COMMAND_WRITE_BUFFER; cmd->event->status = CL_QUEUED; } if (event) *event = cmd->event.weak(); unlock.forget(command_queue); command_queue->enqueue(cmd); unlock.unlockall(); if (blocking_write == CL_TRUE) { clWaitForEventsFCL(1, &(cmd->event.weak())); if (event == NULL) clReleaseEventFCL(cmd->event.weak()); } return CL_SUCCESS; }