void OffloadDescriptor::scatter_copyin_data() { OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", m_in.get_buffer_start(), m_in.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), m_in.get_buffer_size()); // receive data for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); void** ptr_addr = src_is_for_mic ? static_cast<void**>(m_vars[i].ptr) : static_cast<void**>(m_vars[i].into); int type = src_is_for_mic ? m_vars[i].type.src : m_vars[i].type.dst; bool is_static = src_is_for_mic ? m_vars[i].flags.is_static : m_vars[i].flags.is_static_dstn; void *ptr = NULL; if (m_vars[i].flags.alloc_disp) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); m_vars[i].offset = -offset; } if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? reinterpret_cast<ArrDesc*>(ptr_addr) : *reinterpret_cast<ArrDesc**>(ptr_addr); ptr_addr = reinterpret_cast<void**>(&dvp->Base); } // Set pointer values switch (type) { case c_data_ptr_array: { int j = m_vars[i].ptr_arr_offset; int max_el = j + m_vars[i].count; char *dst_arr_ptr = (src_is_for_mic)? *(reinterpret_cast<char**>(m_vars[i].ptr)) : reinterpret_cast<char*>(m_vars[i].into); for (; j < max_el; j++) { if (src_is_for_mic) { m_vars[j].ptr = dst_arr_ptr + m_vars[j].ptr_arr_offset; } else { m_vars[j].into = dst_arr_ptr + m_vars[j].ptr_arr_offset; } } } break; case c_data: case c_void_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); void *ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); *ptr_addr = ptr; } break; case c_func_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; *ptr_addr = ptr; } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, type); abort(); } // Release obsolete buffers for stack of persistent objects if (type = c_data_ptr && m_vars[i].flags.is_stack_buf && !m_vars[i].direction.bits && m_vars[i].alloc_if && m_vars[i].size != 0) { for (int j=0; j < m_vars[i].size; j++) { void *buf; m_in.receive_data(&buf, sizeof(buf)); BufferReleaseRef(buf); ref_data.erase(buf); } } // Do copyin switch (m_vars[i].type.dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: if (m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { int64_t size; int64_t disp; char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); if (m_vars[i].type.dst == c_cean_var) { m_in.receive_data((&size), sizeof(int64_t)); m_in.receive_data((&disp), sizeof(int64_t)); } else { size = m_vars[i].size; disp = 0; } m_in.receive_data(ptr + disp, size); } break; case c_dv: if (m_vars[i].direction.bits || m_vars[i].alloc_if || m_vars[i].free_if) { char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); m_in.receive_data(ptr + sizeof(uint64_t), m_vars[i].size - sizeof(uint64_t)); } break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: break; case c_func_ptr: if (m_vars[i].direction.in) { m_in.receive_func_ptr((const void**) m_vars[i].ptr); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } } OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", m_in.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); OFFLOAD_TIMER_START(c_offload_target_compute); }
void OffloadDescriptor::gather_copyout_data() { OFFLOAD_TIMER_STOP(c_offload_target_compute); OFFLOAD_TIMER_START(c_offload_target_gather_outputs); for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); switch (m_vars[i].type.src) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: if (m_vars[i].direction.out && !m_vars[i].flags.is_static) { m_out.send_data( static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp, m_vars[i].size); } break; case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].free_if && src_is_for_mic && !m_vars[i].flags.is_static) { void *buf = *static_cast<char**>(m_vars[i].ptr) - m_vars[i].mic_offset - (m_vars[i].flags.is_stack_buf? 0 : m_vars[i].offset); if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); } break; case c_func_ptr: if (m_vars[i].direction.out) { m_out.send_func_ptr(*((void**) m_vars[i].ptr)); } break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (src_is_for_mic && m_vars[i].free_if && !m_vars[i].flags.is_static) { ArrDesc *dvp = (m_vars[i].type.src == c_dv_data || m_vars[i].type.src == c_dv_data_slice) ? static_cast<ArrDesc*>(m_vars[i].ptr) : *static_cast<ArrDesc**>(m_vars[i].ptr); void *buf = reinterpret_cast<char*>(dvp->Base) - m_vars[i].mic_offset - m_vars[i].offset; if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } if (m_vars[i].into) { switch (m_vars[i].type.dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].direction.in && m_vars[i].free_if && !m_vars[i].flags.is_static_dstn) { void *buf = *static_cast<char**>(m_vars[i].into) - m_vars[i].mic_offset - (m_vars[i].flags.is_stack_buf? 0 : m_vars[i].offset); if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START( c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP( c_offload_target_release_buffer_refs); } break; case c_func_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].free_if && m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { ArrDesc *dvp = (m_vars[i].type.dst == c_dv_data_slice || m_vars[i].type.dst == c_dv_data) ? static_cast<ArrDesc*>(m_vars[i].into) : *static_cast<ArrDesc**>(m_vars[i].into); void *buf = reinterpret_cast<char*>(dvp->Base) - m_vars[i].mic_offset - m_vars[i].offset; if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START( c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP( c_offload_target_release_buffer_refs); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } } } OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n", m_out.get_buffer_start(), m_out.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_out.get_buffer_start(), m_out.get_buffer_size()); OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data, "Total copyout data sent to host: [%lld] bytes\n", m_out.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs); }
void OffloadDescriptor::scatter_copyin_data() { OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", m_in.get_buffer_start(), m_in.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), m_in.get_buffer_size()); // receive data for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); void** ptr_addr = src_is_for_mic ? static_cast<void**>(m_vars[i].ptr) : static_cast<void**>(m_vars[i].into); int type = src_is_for_mic ? m_vars_extra[i].type_src : m_vars_extra[i].type_dst; bool is_static = src_is_for_mic ? m_vars[i].flags.is_static : m_vars[i].flags.is_static_dstn; void *ptr = NULL; if (m_vars[i].flags.alloc_disp) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); } if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? reinterpret_cast<ArrDesc*>(ptr_addr) : *reinterpret_cast<ArrDesc**>(ptr_addr); ptr_addr = reinterpret_cast<void**>(&dvp->Base); } // Set pointer values switch (type) { case c_data_ptr_array: { int j = m_vars[i].ptr_arr_offset; int max_el = j + m_vars[i].count; char *dst_arr_ptr = (src_is_for_mic)? *(reinterpret_cast<char**>(m_vars[i].ptr)) : reinterpret_cast<char*>(m_vars[i].into); // if is_pointer is 1 it means that pointer array itself // is defined either via pointer or as class member. // i.e. arr_ptr[0:5] or this->ARR[0:5] if (m_vars[i].flags.is_pointer) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); dst_arr_ptr = *((char**)dst_arr_ptr) + offset; } for (; j < max_el; j++) { if (src_is_for_mic) { m_vars[j].ptr = dst_arr_ptr + m_vars[j].ptr_arr_offset; } else { m_vars[j].into = dst_arr_ptr + m_vars[j].ptr_arr_offset; } } } break; case c_data: case c_void_ptr: case c_void_ptr_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_string_ptr_ptr: case c_data_ptr_ptr: case c_cean_var_ptr: case c_cean_var_ptr_ptr: case c_dv_ptr: // Don't need ptr_addr value for variables from stack buffer. // Stack buffer address is set at var_desc with #0. if (i != 0 && m_vars[i].flags.is_stack_buf) { break; } if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) || TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) { int64_t offset; m_in.receive_data(&offset, sizeof(offset)); ptr_addr = reinterpret_cast<void**>( reinterpret_cast<char*>(*ptr_addr) + offset); } if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) { void *buf = NULL; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); OFFLOAD_TRACE(1, " AddRef count = %d\n", ((RefInfo *) ref_data[buf])->count); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); void *ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); *ptr_addr = ptr; } break; case c_func_ptr: case c_func_ptr_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; *ptr_addr = ptr; } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, type); abort(); } // Release obsolete buffers for stack of persistent objects. // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for // stack buffer pointer. if (i == 0 && m_vars[i].flags.is_stack_buf && !m_vars[i].direction.bits && m_vars[i].alloc_if && m_vars[i].size != 0) { for (int j=0; j < m_vars[i].size; j++) { void *buf; m_in.receive_data(&buf, sizeof(buf)); OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf); BufferReleaseRef(buf); ref_data.erase(buf); } } // Do copyin switch (m_vars_extra[i].type_dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_void_ptr_ptr: case c_cean_var: if (m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { int64_t size; int64_t disp; char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); if (m_vars_extra[i].type_dst == c_cean_var) { m_in.receive_data((&size), sizeof(int64_t)); m_in.receive_data((&disp), sizeof(int64_t)); } else { size = m_vars[i].size; disp = 0; } m_in.receive_data(ptr + disp, size); } break; case c_dv: if (m_vars[i].direction.bits || m_vars[i].alloc_if || m_vars[i].free_if) { char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); m_in.receive_data(ptr + sizeof(uint64_t), m_vars[i].size - sizeof(uint64_t)); } break; case c_string_ptr: case c_data_ptr: case c_string_ptr_ptr: case c_data_ptr_ptr: case c_cean_var_ptr: case c_cean_var_ptr_ptr: case c_dv_ptr: case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: break; case c_func_ptr: case c_func_ptr_ptr: if (m_vars[i].direction.in) { m_in.receive_func_ptr((const void**) m_vars[i].ptr); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); abort(); } } OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", m_in.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); OFFLOAD_TIMER_START(c_offload_target_compute); }