static void BufReleaseRef(void * buf) { mutex_locker_t locker(add_ref_lock); RefInfo * info = ref_data[buf]; if (info) { --info->count; if (info->count == 0 && info->is_added) { BufferReleaseRef(buf); info->is_added = 0; } } }
static void BufReleaseRef(void * buf) { mutex_locker_t locker(add_ref_lock); RefInfo * info = ref_data[buf]; if (info) { --info->count; if (info->count == 0 && info->is_added) { OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n", ((RefInfo *) ref_data[buf])->count); BufferReleaseRef(buf); info->is_added = 0; } } }
void OffloadDescriptor::scatter_copyin_data() { OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", m_in.get_buffer_start(), m_in.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), m_in.get_buffer_size()); // receive data for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); void** ptr_addr = src_is_for_mic ? static_cast<void**>(m_vars[i].ptr) : static_cast<void**>(m_vars[i].into); int type = src_is_for_mic ? m_vars[i].type.src : m_vars[i].type.dst; bool is_static = src_is_for_mic ? m_vars[i].flags.is_static : m_vars[i].flags.is_static_dstn; void *ptr = NULL; if (m_vars[i].flags.alloc_disp) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); m_vars[i].offset = -offset; } if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? reinterpret_cast<ArrDesc*>(ptr_addr) : *reinterpret_cast<ArrDesc**>(ptr_addr); ptr_addr = reinterpret_cast<void**>(&dvp->Base); } // Set pointer values switch (type) { case c_data_ptr_array: { int j = m_vars[i].ptr_arr_offset; int max_el = j + m_vars[i].count; char *dst_arr_ptr = (src_is_for_mic)? *(reinterpret_cast<char**>(m_vars[i].ptr)) : reinterpret_cast<char*>(m_vars[i].into); for (; j < max_el; j++) { if (src_is_for_mic) { m_vars[j].ptr = dst_arr_ptr + m_vars[j].ptr_arr_offset; } else { m_vars[j].into = dst_arr_ptr + m_vars[j].ptr_arr_offset; } } } break; case c_data: case c_void_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); void *ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); *ptr_addr = ptr; } break; case c_func_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; *ptr_addr = ptr; } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, type); abort(); } // Release obsolete buffers for stack of persistent objects if (type = c_data_ptr && m_vars[i].flags.is_stack_buf && !m_vars[i].direction.bits && m_vars[i].alloc_if && m_vars[i].size != 0) { for (int j=0; j < m_vars[i].size; j++) { void *buf; m_in.receive_data(&buf, sizeof(buf)); BufferReleaseRef(buf); ref_data.erase(buf); } } // Do copyin switch (m_vars[i].type.dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: if (m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { int64_t size; int64_t disp; char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); if (m_vars[i].type.dst == c_cean_var) { m_in.receive_data((&size), sizeof(int64_t)); m_in.receive_data((&disp), sizeof(int64_t)); } else { size = m_vars[i].size; disp = 0; } m_in.receive_data(ptr + disp, size); } break; case c_dv: if (m_vars[i].direction.bits || m_vars[i].alloc_if || m_vars[i].free_if) { char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); m_in.receive_data(ptr + sizeof(uint64_t), m_vars[i].size - sizeof(uint64_t)); } break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: break; case c_func_ptr: if (m_vars[i].direction.in) { m_in.receive_func_ptr((const void**) m_vars[i].ptr); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } } OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", m_in.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); OFFLOAD_TIMER_START(c_offload_target_compute); }
void OffloadDescriptor::scatter_copyin_data() { OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", m_in.get_buffer_start(), m_in.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), m_in.get_buffer_size()); // receive data for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); void** ptr_addr = src_is_for_mic ? static_cast<void**>(m_vars[i].ptr) : static_cast<void**>(m_vars[i].into); int type = src_is_for_mic ? m_vars_extra[i].type_src : m_vars_extra[i].type_dst; bool is_static = src_is_for_mic ? m_vars[i].flags.is_static : m_vars[i].flags.is_static_dstn; void *ptr = NULL; if (m_vars[i].flags.alloc_disp) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); } if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? reinterpret_cast<ArrDesc*>(ptr_addr) : *reinterpret_cast<ArrDesc**>(ptr_addr); ptr_addr = reinterpret_cast<void**>(&dvp->Base); } // Set pointer values switch (type) { case c_data_ptr_array: { int j = m_vars[i].ptr_arr_offset; int max_el = j + m_vars[i].count; char *dst_arr_ptr = (src_is_for_mic)? *(reinterpret_cast<char**>(m_vars[i].ptr)) : reinterpret_cast<char*>(m_vars[i].into); // if is_pointer is 1 it means that pointer array itself // is defined either via pointer or as class member. // i.e. arr_ptr[0:5] or this->ARR[0:5] if (m_vars[i].flags.is_pointer) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); dst_arr_ptr = *((char**)dst_arr_ptr) + offset; } for (; j < max_el; j++) { if (src_is_for_mic) { m_vars[j].ptr = dst_arr_ptr + m_vars[j].ptr_arr_offset; } else { m_vars[j].into = dst_arr_ptr + m_vars[j].ptr_arr_offset; } } } break; case c_data: case c_void_ptr: case c_void_ptr_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_string_ptr_ptr: case c_data_ptr_ptr: case c_cean_var_ptr: case c_cean_var_ptr_ptr: case c_dv_ptr: // Don't need ptr_addr value for variables from stack buffer. // Stack buffer address is set at var_desc with #0. if (i != 0 && m_vars[i].flags.is_stack_buf) { break; } if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) || TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) { int64_t offset; m_in.receive_data(&offset, sizeof(offset)); ptr_addr = reinterpret_cast<void**>( reinterpret_cast<char*>(*ptr_addr) + offset); } if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) { void *buf = NULL; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); OFFLOAD_TRACE(1, " AddRef count = %d\n", ((RefInfo *) ref_data[buf])->count); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); void *ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); *ptr_addr = ptr; } break; case c_func_ptr: case c_func_ptr_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; *ptr_addr = ptr; } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, type); abort(); } // Release obsolete buffers for stack of persistent objects. // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for // stack buffer pointer. if (i == 0 && m_vars[i].flags.is_stack_buf && !m_vars[i].direction.bits && m_vars[i].alloc_if && m_vars[i].size != 0) { for (int j=0; j < m_vars[i].size; j++) { void *buf; m_in.receive_data(&buf, sizeof(buf)); OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf); BufferReleaseRef(buf); ref_data.erase(buf); } } // Do copyin switch (m_vars_extra[i].type_dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_void_ptr_ptr: case c_cean_var: if (m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { int64_t size; int64_t disp; char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); if (m_vars_extra[i].type_dst == c_cean_var) { m_in.receive_data((&size), sizeof(int64_t)); m_in.receive_data((&disp), sizeof(int64_t)); } else { size = m_vars[i].size; disp = 0; } m_in.receive_data(ptr + disp, size); } break; case c_dv: if (m_vars[i].direction.bits || m_vars[i].alloc_if || m_vars[i].free_if) { char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); m_in.receive_data(ptr + sizeof(uint64_t), m_vars[i].size - sizeof(uint64_t)); } break; case c_string_ptr: case c_data_ptr: case c_string_ptr_ptr: case c_data_ptr_ptr: case c_cean_var_ptr: case c_cean_var_ptr_ptr: case c_dv_ptr: case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: break; case c_func_ptr: case c_func_ptr_ptr: if (m_vars[i].direction.in) { m_in.receive_func_ptr((const void**) m_vars[i].ptr); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); abort(); } } OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", m_in.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); OFFLOAD_TIMER_START(c_offload_target_compute); }