static nd::array view_concrete(const nd::array &arr, const ndt::type &tp) { // Allocate a result array to attempt the view in it nd::array result(make_array_memory_block(tp.get_arrmeta_size())); // Copy the fields result.get_ndo()->data.ptr = arr.get_ndo()->data.ptr; if (arr.get_ndo()->data.ref == NULL) { // Embedded data, need reference to the array result.get_ndo()->data.ref = arr.get_memblock().release(); } else { // Use the same data reference, avoid producing a chain result.get_ndo()->data.ref = arr.get_data_memblock().release(); } result.get_ndo()->m_type = ndt::type(tp).release(); result.get_ndo()->m_flags = arr.get_ndo()->m_flags; // First handle a special case of viewing outermost "var" as "fixed[#]" if (arr.get_type().get_type_id() == var_dim_type_id && tp.get_type_id() == fixed_dim_type_id) { const var_dim_type_arrmeta *in_am = reinterpret_cast<const var_dim_type_arrmeta *>(arr.get_arrmeta()); const var_dim_type_data *in_dat = reinterpret_cast<const var_dim_type_data *>(arr.get_readonly_originptr()); fixed_dim_type_arrmeta *out_am = reinterpret_cast<fixed_dim_type_arrmeta *>(result.get_arrmeta()); out_am->dim_size = tp.extended<ndt::fixed_dim_type>()->get_fixed_dim_size(); out_am->stride = in_am->stride; if ((intptr_t)in_dat->size == out_am->dim_size) { // Use the more specific data reference from the var arrmeta if possible if (in_am->blockref != NULL) { memory_block_decref(result.get_ndo()->data.ref); memory_block_incref(in_am->blockref); result.get_ndo()->data.ref = in_am->blockref; } result.get_ndo()->data.ptr = in_dat->begin + in_am->offset; // Try to copy the rest of the arrmeta as a view if (try_view(arr.get_type().extended<ndt::base_dim_type>()->get_element_type(), arr.get_arrmeta() + sizeof(var_dim_type_arrmeta), tp.extended<ndt::base_dim_type>()->get_element_type(), result.get_arrmeta() + sizeof(fixed_dim_type_arrmeta), arr.get_memblock().get())) { return result; } } } // Otherwise try to copy the arrmeta as a view else if (try_view(arr.get_type(), arr.get_arrmeta(), tp, result.get_arrmeta(), arr.get_memblock().get())) { // If it succeeded, return it return result; } stringstream ss; ss << "Unable to view nd::array of type " << arr.get_type(); ss << " as type " << tp; throw type_error(ss.str()); }
nd::array dynd::format_json(const nd::array &n, bool struct_as_list) { // Create a UTF-8 string nd::array result = nd::empty(ndt::string_type::make()); // Initialize the output with some memory output_data out; out.out_string.resize(1024); out.out_begin = out.out_string.begin(); out.out_capacity_end = out.out_string.end(); out.out_end = out.out_begin; out.struct_as_list = struct_as_list; if (!n.get_type().is_expression()) { ::format_json(out, n.get_type(), n.get_arrmeta(), n.get_readonly_originptr()); } else { nd::array tmp = n.eval(); ::format_json(out, tmp.get_type(), tmp.get_arrmeta(), tmp.get_readonly_originptr()); } // Shrink the memory to fit, and set the pointers in the output string *d = reinterpret_cast<string *>(result.get_readwrite_originptr()); d->assign(out.out_string.data(), out.out_end - out.out_begin); // Finalize processing and mark the result as immutable result.get_type().extended()->arrmeta_finalize_buffers(result.get_arrmeta()); result.flag_as_immutable(); return result; }
static nd::array array_function_dereference(const nd::array &self) { // Follow the pointers to eliminate them ndt::type dt = self.get_type(); const char *arrmeta = self.get_arrmeta(); char *data = self.get_ndo()->m_data_pointer; memory_block_data *dataref = self.get_ndo()->m_data_reference; if (dataref == NULL) { dataref = self.get_memblock().get(); } uint64_t flags = self.get_ndo()->m_flags; while (dt.get_type_id() == pointer_type_id) { const pointer_type_arrmeta *md = reinterpret_cast<const pointer_type_arrmeta *>(arrmeta); dt = dt.extended<ndt::pointer_type>()->get_target_type(); arrmeta += sizeof(pointer_type_arrmeta); data = *reinterpret_cast<char **>(data) + md->offset; dataref = md->blockref; } // Create an array without the pointers nd::array result(make_array_memory_block(dt.get_arrmeta_size())); if (!dt.is_builtin()) { dt.extended()->arrmeta_copy_construct(result.get_arrmeta(), arrmeta, &self.get_ndo()->m_memblockdata); } result.get_ndo()->m_type = dt.release(); result.get_ndo()->m_data_pointer = data; result.get_ndo()->m_data_reference = dataref; memory_block_incref(result.get_ndo()->m_data_reference); result.get_ndo()->m_flags = flags; return result; }
inline static bool run(nd::array &a) { const ndt::type &tp = a.get_type(); if (a.is_immutable() && tp.get_type_id() == fixed_dim_type_id) { // It's immutable and "N * <something>" const ndt::type &et = tp.extended<fixed_dim_type>()->get_element_type(); const fixed_dim_type_arrmeta *md = reinterpret_cast<const fixed_dim_type_arrmeta *>(a.get_arrmeta()); if (et.get_type_id() == type_type_id && md->stride == sizeof(ndt::type)) { // It also has the right type and is contiguous, // so no modification necessary. return true; } } // We have to make a copy, check that it's a 1D array, and that // it has the same array kind as the requested type. if (tp.get_ndim() == 1) { // It's a 1D array const ndt::type &et = tp.get_type_at_dimension(NULL, 1).value_type(); if (et.get_type_id() == type_type_id) { // It also has the same array type as requested nd::array tmp = nd::empty(a.get_dim_size(), ndt::make_type()); tmp.vals() = a; tmp.flag_as_immutable(); a.swap(tmp); return true; } } // It's not compatible, so return false return false; }
static void json_as_buffer(const nd::array &json, nd::array &out_tmp_ref, const char *&begin, const char *&end) { // Check the type of 'json', and get pointers to the begin/end of a UTF-8 // buffer ndt::type json_type = json.get_type().value_type(); switch (json_type.get_kind()) { case string_kind: { const ndt::base_string_type *sdt = json_type.extended<ndt::base_string_type>(); switch (sdt->get_encoding()) { case string_encoding_ascii: case string_encoding_utf_8: out_tmp_ref = json.eval(); // The data is already UTF-8, so use the buffer directly sdt->get_string_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; default: { // The data needs to be converted to UTF-8 before parsing ndt::type utf8_tp = ndt::string_type::make(string_encoding_utf_8); out_tmp_ref = json.ucast(utf8_tp).eval(); sdt = static_cast<const ndt::base_string_type *>(utf8_tp.extended()); sdt->get_string_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; } } break; } case bytes_kind: { out_tmp_ref = json.eval(); const ndt::base_bytes_type *bdt = json_type.extended<ndt::base_bytes_type>(); bdt->get_bytes_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; } default: { stringstream ss; ss << "Input for JSON parsing must be either bytes (interpreted as UTF-8) " "or a string, not \"" << json_type << "\""; throw runtime_error(ss.str()); break; } } }
void dynd::typed_data_assign(const ndt::type &dst_tp, const char *dst_arrmeta, char *dst_data, const nd::array &src_arr, const eval::eval_context *ectx) { typed_data_assign(dst_tp, dst_arrmeta, dst_data, src_arr.get_type(), src_arr.get_arrmeta(), src_arr.get_readonly_originptr(), ectx); }
const char *get_category_data_from_value(size_t value) const { if (value >= get_category_count()) { throw std::runtime_error("category value is out of bounds"); } return m_categories.get_readonly_originptr() + m_value_to_category_index[value] * reinterpret_cast<const strided_dim_type_arrmeta *>( m_categories.get_arrmeta())->stride; }
const char *get_category_data_from_value(uint32_t value) const { if (value >= get_category_count()) { throw std::runtime_error("category value is out of bounds"); } return m_categories.get_readonly_originptr() + unchecked_fixed_dim_get<intptr_t>(m_value_to_category_index, value) * reinterpret_cast<const fixed_dim_type_arrmeta *>( m_categories.get_arrmeta())->stride; }
uint32_t categorical_type::get_value_from_category(const nd::array& category) const { if (category.get_type() == m_category_tp) { // If the type is right, get the category value directly return get_value_from_category(category.get_arrmeta(), category.get_readonly_originptr()); } else { // Otherwise convert to the correct type, then get the category value nd::array c = nd::empty(m_category_tp); c.val_assign(category); return get_value_from_category(c.get_arrmeta(), c.get_readonly_originptr()); } }
static nd::array view_as_bytes(const nd::array &arr, const ndt::type &tp) { if (arr.get_type().get_flags() & type_flag_destructor) { // Can't view arrays of object type return nd::array(); } // Get the essential components of the array to analyze memory_block_ptr data_ref = arr.get_data_memblock(); char *data_ptr = arr.get_ndo()->data.ptr; ndt::type data_tp = arr.get_type(); const char *data_meta = arr.get_arrmeta(); intptr_t data_dim_size = -1, data_stride = 0; // Repeatedly refine the data while (data_tp.get_type_id() != uninitialized_type_id) { refine_bytes_view(data_ref, data_ptr, data_tp, data_meta, data_dim_size, data_stride); } // Check that it worked, and that the resulting data pointer is aligned if (data_dim_size < 0 || !offset_is_aligned(reinterpret_cast<size_t>(data_ptr), tp.extended<ndt::bytes_type>()->get_target_alignment())) { // This signals we could not view the data as a // contiguous chunk of bytes return nd::array(); } char *result_data_ptr = NULL; nd::array result(make_array_memory_block(tp.extended()->get_arrmeta_size(), tp.get_data_size(), tp.get_data_alignment(), &result_data_ptr)); // Set the bytes extents ((char **)result_data_ptr)[0] = data_ptr; ((char **)result_data_ptr)[1] = data_ptr + data_dim_size; // Set the array arrmeta array_preamble *ndo = result.get_ndo(); ndo->m_type = ndt::type(tp).release(); ndo->data.ptr = result_data_ptr; ndo->data.ref = NULL; ndo->m_flags = arr.get_flags(); // Set the bytes arrmeta bytes_type_arrmeta *ndo_meta = reinterpret_cast<bytes_type_arrmeta *>(result.get_arrmeta()); ndo_meta->blockref = data_ref.release(); return result; }
void dynd::parse_json(nd::array &out, const char *json_begin, const char *json_end, const eval::eval_context *ectx) { try { const char *begin = json_begin, *end = json_end; ndt::type tp = out.get_type(); ::parse_json(tp, out.get_arrmeta(), out.get_readwrite_originptr(), begin, end, ectx); begin = skip_whitespace(begin, end); if (begin != end) { throw json_parse_error(begin, "unexpected trailing JSON text", tp); } } catch (const json_parse_error &e) { stringstream ss; string line_prev, line_cur; int line, column; get_error_line_column(json_begin, json_end, e.get_position(), line_prev, line_cur, line, column); ss << "Error parsing JSON at line " << line << ", column " << column << "\n"; ss << "DyND Type: " << e.get_type() << "\n"; ss << "Message: " << e.what() << "\n"; print_json_parse_error_marker(ss, line_prev, line_cur, line, column); throw invalid_argument(ss.str()); } catch (const parse::parse_error &e) { stringstream ss; string line_prev, line_cur; int line, column; get_error_line_column(json_begin, json_end, e.get_position(), line_prev, line_cur, line, column); ss << "Error parsing JSON at line " << line << ", column " << column << "\n"; ss << "Message: " << e.what() << "\n"; print_json_parse_error_marker(ss, line_prev, line_cur, line, column); throw invalid_argument(ss.str()); } }
nd::array dynd::format_json(const nd::array& n, bool struct_as_list) { // Create a UTF-8 string nd::array result = nd::empty(ndt::make_string()); // Initialize the output with some memory output_data out; out.blockref = reinterpret_cast<const string_type_arrmeta *>( result.get_arrmeta())->blockref; out.api = get_memory_block_pod_allocator_api(out.blockref); out.api->allocate(out.blockref, 1024, 1, &out.out_begin, &out.out_capacity_end); out.out_end = out.out_begin; out.struct_as_list = struct_as_list; if (!n.get_type().is_expression()) { ::format_json(out, n.get_type(), n.get_arrmeta(), n.get_readonly_originptr()); } else { nd::array tmp = n.eval(); ::format_json(out, tmp.get_type(), tmp.get_arrmeta(), tmp.get_readonly_originptr()); } // Shrink the memory to fit, and set the pointers in the output string_type_data *d = reinterpret_cast<string_type_data *>(result.get_readwrite_originptr()); d->begin = out.out_begin; d->end = out.out_capacity_end; out.api->resize(out.blockref, out.out_end - out.out_begin, &d->begin, &d->end); // Finalize processing and mark the result as immutable result.get_type().extended()->arrmeta_finalize_buffers(result.get_arrmeta()); result.flag_as_immutable(); return result; }
nd::array dynd::struct_concat(nd::array lhs, nd::array rhs) { nd::array res; if (lhs.is_null()) { res = rhs; return res; } if (rhs.is_null()) { res = lhs; return res; } const ndt::type &lhs_tp = lhs.get_type(), &rhs_tp = rhs.get_type(); if (lhs_tp.get_kind() != struct_kind) { stringstream ss; ss << "Cannot concatenate array with type " << lhs_tp << " as a struct"; throw invalid_argument(ss.str()); } if (rhs_tp.get_kind() != struct_kind) { stringstream ss; ss << "Cannot concatenate array with type " << rhs_tp << " as a struct"; throw invalid_argument(ss.str()); } // Make an empty shell struct by concatenating the fields together intptr_t lhs_n = lhs_tp.extended<ndt::base_struct_type>()->get_field_count(); intptr_t rhs_n = rhs_tp.extended<ndt::base_struct_type>()->get_field_count(); intptr_t res_n = lhs_n + rhs_n; nd::array res_field_names = nd::empty(res_n, ndt::string_type::make()); nd::array res_field_types = nd::empty(res_n, ndt::make_type()); res_field_names(irange(0, lhs_n)).vals() = lhs_tp.extended<ndt::base_struct_type>()->get_field_names(); res_field_names(irange(lhs_n, res_n)).vals() = rhs_tp.extended<ndt::base_struct_type>()->get_field_names(); res_field_types(irange(0, lhs_n)).vals() = lhs_tp.extended<ndt::base_struct_type>()->get_field_types(); res_field_types(irange(lhs_n, res_n)).vals() = rhs_tp.extended<ndt::base_struct_type>()->get_field_types(); ndt::type res_tp = ndt::struct_type::make(res_field_names, res_field_types); const ndt::type *res_field_tps = res_tp.extended<ndt::base_struct_type>()->get_field_types_raw(); res = nd::empty_shell(res_tp); // Initialize the default data offsets for the struct arrmeta ndt::struct_type::fill_default_data_offsets(res_n, res_tp.extended<ndt::base_struct_type>()->get_field_types_raw(), reinterpret_cast<uintptr_t *>(res.get_arrmeta())); // Get information about the arrmeta layout of the input and res const uintptr_t *lhs_arrmeta_offsets = lhs_tp.extended<ndt::base_struct_type>()->get_arrmeta_offsets_raw(); const uintptr_t *rhs_arrmeta_offsets = rhs_tp.extended<ndt::base_struct_type>()->get_arrmeta_offsets_raw(); const uintptr_t *res_arrmeta_offsets = res_tp.extended<ndt::base_struct_type>()->get_arrmeta_offsets_raw(); const char *lhs_arrmeta = lhs.get_arrmeta(); const char *rhs_arrmeta = rhs.get_arrmeta(); char *res_arrmeta = res.get_arrmeta(); // Copy the arrmeta from the input arrays for (intptr_t i = 0; i < lhs_n; ++i) { const ndt::type &tp = res_field_tps[i]; if (!tp.is_builtin()) { tp.extended()->arrmeta_copy_construct(res_arrmeta + res_arrmeta_offsets[i], lhs_arrmeta + lhs_arrmeta_offsets[i], lhs.get_data_memblock().get()); } } for (intptr_t i = 0; i < rhs_n; ++i) { const ndt::type &tp = res_field_tps[i + lhs_n]; if (!tp.is_builtin()) { tp.extended()->arrmeta_copy_construct(res_arrmeta + res_arrmeta_offsets[i + lhs_n], rhs_arrmeta + rhs_arrmeta_offsets[i], rhs.get_data_memblock().get()); } } // Get information about the data layout of the input and res const uintptr_t *lhs_data_offsets = lhs_tp.extended<ndt::base_struct_type>()->get_data_offsets(lhs.get_arrmeta()); const uintptr_t *rhs_data_offsets = rhs_tp.extended<ndt::base_struct_type>()->get_data_offsets(rhs.get_arrmeta()); const uintptr_t *res_data_offsets = res_tp.extended<ndt::base_struct_type>()->get_data_offsets(res.get_arrmeta()); const char *lhs_data = lhs.get_readonly_originptr(); const char *rhs_data = rhs.get_readonly_originptr(); char *res_data = res.get_readwrite_originptr(); // Copy the data from the input arrays for (intptr_t i = 0; i < lhs_n; ++i) { const ndt::type &tp = res_field_tps[i]; typed_data_copy(tp, res_arrmeta + res_arrmeta_offsets[i], res_data + res_data_offsets[i], lhs_arrmeta + lhs_arrmeta_offsets[i], lhs_data + lhs_data_offsets[i]); } for (intptr_t i = 0; i < rhs_n; ++i) { const ndt::type &tp = res_field_tps[i + lhs_n]; typed_data_copy(tp, res_arrmeta + res_arrmeta_offsets[i + lhs_n], res_data + res_data_offsets[i + lhs_n], rhs_arrmeta + rhs_arrmeta_offsets[i], rhs_data + rhs_data_offsets[i]); } return res; }
/** * Adds a ckernel layer for processing one dimension of the reduction. * This is for a strided dimension which is being broadcast, and is * the final dimension before the accumulation operation. */ static size_t make_strided_inner_broadcast_dimension_kernel( const callable_type_data *elwise_reduction_const, const ndt::callable_type *elwise_reduction_tp, const callable_type_data *dst_initialization_const, const ndt::callable_type *dst_initialization_tp, void *ckb, intptr_t ckb_offset, intptr_t dst_stride, intptr_t src_stride, intptr_t src_size, const ndt::type &dst_tp, const char *dst_arrmeta, const ndt::type &src_tp, const char *src_arrmeta, bool right_associative, const nd::array &reduction_identity, kernel_request_t kernreq, const eval::eval_context *ectx) { callable_type_data *elwise_reduction = const_cast<callable_type_data *>(elwise_reduction_const); callable_type_data *dst_initialization = const_cast<callable_type_data *>(dst_initialization_const); intptr_t root_ckb_offset = ckb_offset; strided_inner_broadcast_kernel_extra *e = reinterpret_cast<ckernel_builder<kernel_request_host> *>(ckb) ->alloc_ck<strided_inner_broadcast_kernel_extra>(ckb_offset); e->destructor = &strided_inner_broadcast_kernel_extra::destruct; // Cannot have both a dst_initialization kernel and a reduction identity if (dst_initialization != NULL && !reduction_identity.is_null()) { throw invalid_argument( "make_lifted_reduction_ckernel: cannot specify" " both a dst_initialization kernel and a reduction_identity"); } if (reduction_identity.is_null()) { // Get the function pointer for the first_call, for the case with // no reduction identity if (kernreq == kernel_request_single) { e->set_first_call_function( &strided_inner_broadcast_kernel_extra::single_first); } else if (kernreq == kernel_request_strided) { e->set_first_call_function( &strided_inner_broadcast_kernel_extra::strided_first); } else { stringstream ss; ss << "make_lifted_reduction_ckernel: unrecognized request " << (int)kernreq; throw runtime_error(ss.str()); } } else { // Get the function pointer for the first_call, for the case with // a reduction identity if (kernreq == kernel_request_single) { e->set_first_call_function( &strided_inner_broadcast_kernel_extra::single_first_with_ident); } else if (kernreq == kernel_request_strided) { e->set_first_call_function( &strided_inner_broadcast_kernel_extra::strided_first_with_ident); } else { stringstream ss; ss << "make_lifted_reduction_ckernel: unrecognized request " << (int)kernreq; throw runtime_error(ss.str()); } if (reduction_identity.get_type() != dst_tp) { stringstream ss; ss << "make_lifted_reduction_ckernel: reduction identity type "; ss << reduction_identity.get_type() << " does not match dst type "; ss << dst_tp; throw runtime_error(ss.str()); } e->ident_data = reduction_identity.get_readonly_originptr(); e->ident_ref = reduction_identity.get_memblock().release(); } // The function pointer for followup accumulation calls e->set_followup_call_function( &strided_inner_broadcast_kernel_extra::strided_followup); // The striding parameters e->dst_stride = dst_stride; e->src_stride = src_stride; e->size = src_size; // Validate that the provided callables are unary operations, // and have the correct types if (elwise_reduction_tp->get_npos() != 1 && elwise_reduction_tp->get_npos() != 2) { stringstream ss; ss << "make_lifted_reduction_ckernel: elwise reduction ckernel "; ss << "funcproto must be unary or a binary expr with all equal types"; throw runtime_error(ss.str()); } if (elwise_reduction_tp->get_return_type() != dst_tp) { stringstream ss; ss << "make_lifted_reduction_ckernel: elwise reduction ckernel "; ss << "dst type is " << elwise_reduction_tp->get_return_type(); ss << ", expected " << dst_tp; throw type_error(ss.str()); } if (elwise_reduction_tp->get_pos_type(0) != src_tp) { stringstream ss; ss << "make_lifted_reduction_ckernel: elwise reduction ckernel "; ss << "src type is " << elwise_reduction_tp->get_return_type(); ss << ", expected " << src_tp; throw type_error(ss.str()); } if (dst_initialization != NULL) { check_dst_initialization(dst_initialization_tp, dst_tp, src_tp); } if (elwise_reduction_tp->get_npos() == 2) { ckb_offset = kernels::wrap_binary_as_unary_reduction_ckernel( ckb, ckb_offset, right_associative, kernel_request_strided); ndt::type src_tp_doubled[2] = {src_tp, src_tp}; const char *src_arrmeta_doubled[2] = {src_arrmeta, src_arrmeta}; ckb_offset = elwise_reduction->instantiate( elwise_reduction->static_data, 0, NULL, ckb, ckb_offset, dst_tp, dst_arrmeta, elwise_reduction_tp->get_npos(), src_tp_doubled, src_arrmeta_doubled, kernel_request_strided, ectx, nd::array(), std::map<nd::string, ndt::type>()); } else { ckb_offset = elwise_reduction->instantiate( elwise_reduction->static_data, 0, NULL, ckb, ckb_offset, dst_tp, dst_arrmeta, elwise_reduction_tp->get_npos(), &src_tp, &src_arrmeta, kernel_request_strided, ectx, nd::array(), std::map<nd::string, ndt::type>()); } // Make sure there's capacity for the next ckernel reinterpret_cast<ckernel_builder<kernel_request_host> *>(ckb) ->reserve(ckb_offset + sizeof(ckernel_prefix)); // Need to retrieve 'e' again because it may have moved e = reinterpret_cast<ckernel_builder<kernel_request_host> *>(ckb) ->get_at<strided_inner_broadcast_kernel_extra>(root_ckb_offset); e->dst_init_kernel_offset = ckb_offset - root_ckb_offset; if (dst_initialization != NULL) { ckb_offset = dst_initialization->instantiate( dst_initialization->static_data, 0, NULL, ckb, ckb_offset, dst_tp, dst_arrmeta, elwise_reduction_tp->get_npos(), &src_tp, &src_arrmeta, kernel_request_strided, ectx, nd::array(), std::map<nd::string, ndt::type>()); } else if (reduction_identity.is_null()) { ckb_offset = make_assignment_kernel(ckb, ckb_offset, dst_tp, dst_arrmeta, src_tp, src_arrmeta, kernel_request_strided, ectx); } else { ckb_offset = make_assignment_kernel( ckb, ckb_offset, dst_tp, dst_arrmeta, reduction_identity.get_type(), reduction_identity.get_arrmeta(), kernel_request_strided, ectx); } return ckb_offset; }
categorical_type::categorical_type(const nd::array& categories, bool presorted) : base_type(categorical_type_id, custom_kind, 4, 4, type_flag_scalar, 0, 0, 0) { intptr_t category_count; if (presorted) { // This is construction shortcut, for the case when the categories are already // sorted. No validation of this is done, the caller should have ensured it // was correct already, typically by construction. m_categories = categories.eval_immutable(); m_category_tp = m_categories.get_type().at(0); category_count = categories.get_dim_size(); m_value_to_category_index.resize(category_count); m_category_index_to_value.resize(category_count); for (size_t i = 0; i != (size_t)category_count; ++i) { m_value_to_category_index[i] = i; m_category_index_to_value[i] = i; } } else { // Process the categories array to make sure it's valid const ndt::type& cdt = categories.get_type(); if (cdt.get_type_id() != strided_dim_type_id) { throw dynd::type_error("categorical_type only supports construction from a strided array of categories"); } m_category_tp = categories.get_type().at(0); if (!m_category_tp.is_scalar()) { throw dynd::type_error("categorical_type only supports construction from a 1-dimensional strided array of categories"); } category_count = categories.get_dim_size(); intptr_t categories_stride = reinterpret_cast<const strided_dim_type_arrmeta *>(categories.get_arrmeta())->stride; const char *categories_element_arrmeta = categories.get_arrmeta() + sizeof(strided_dim_type_arrmeta); comparison_ckernel_builder k; ::make_comparison_kernel(&k, 0, m_category_tp, categories_element_arrmeta, m_category_tp, categories_element_arrmeta, comparison_type_sorting_less, &eval::default_eval_context); cmp less(k.get_function(), k.get()); set<const char *, cmp> uniques(less); m_value_to_category_index.resize(category_count); m_category_index_to_value.resize(category_count); // create the mapping from indices of (to be lexicographically sorted) categories to values for (size_t i = 0; i != (size_t)category_count; ++i) { m_category_index_to_value[i] = i; const char *category_value = categories.get_readonly_originptr() + i * categories_stride; if (uniques.find(category_value) == uniques.end()) { uniques.insert(category_value); } else { stringstream ss; ss << "categories must be unique: category value "; m_category_tp.print_data(ss, categories_element_arrmeta, category_value); ss << " appears more than once"; throw std::runtime_error(ss.str()); } } // TODO: Putting everything in a set already caused a sort operation to occur, // there's no reason we should need a second sort. std::sort(m_category_index_to_value.begin(), m_category_index_to_value.end(), sorter(categories.get_readonly_originptr(), categories_stride, k.get_function(), k.get())); // invert the m_category_index_to_value permutation for (uint32_t i = 0; i < m_category_index_to_value.size(); ++i) { m_value_to_category_index[m_category_index_to_value[i]] = i; } m_categories = make_sorted_categories(uniques, m_category_tp, categories_element_arrmeta); } // Use the number of categories to set which underlying integer storage to use if (category_count <= 256) { m_storage_type = ndt::make_type<uint8_t>(); } else if (category_count <= 65536) { m_storage_type = ndt::make_type<uint16_t>(); } else { m_storage_type = ndt::make_type<uint32_t>(); } m_members.data_size = m_storage_type.get_data_size(); m_members.data_alignment = (uint8_t)m_storage_type.get_data_alignment(); }
size_t get_category_count() const { return (size_t) reinterpret_cast<const strided_dim_type_arrmeta *>( m_categories.get_arrmeta())->dim_size; }
static nd::array view_from_bytes(const nd::array &arr, const ndt::type &tp) { if (tp.get_flags() & (type_flag_blockref | type_flag_destructor | type_flag_not_host_readable)) { // Bytes cannot be viewed as blockref types, types which require // destruction, or types not on host memory. return nd::array(); } const bytes_type_arrmeta *bytes_meta = reinterpret_cast<const bytes_type_arrmeta *>(arr.get_arrmeta()); bytes_type_data *bytes_d = reinterpret_cast<bytes_type_data *>(arr.get_ndo()->data.ptr); memory_block_ptr data_ref; if (bytes_meta->blockref != NULL) { data_ref = bytes_meta->blockref; } else { data_ref = arr.get_data_memblock(); } char *data_ptr = bytes_d->begin; intptr_t data_size = bytes_d->end - data_ptr; size_t tp_data_size = tp.get_data_size(); if (tp_data_size > 0) { // If the data type has a single chunk of POD memory, it's ok if ((intptr_t)tp_data_size == data_size && offset_is_aligned(reinterpret_cast<size_t>(data_ptr), tp.get_data_alignment())) { // Allocate a result array to attempt the view in it nd::array result(make_array_memory_block(tp.get_arrmeta_size())); // Initialize the fields result.get_ndo()->data.ptr = data_ptr; result.get_ndo()->data.ref = data_ref.release(); result.get_ndo()->m_type = ndt::type(tp).release(); result.get_ndo()->m_flags = arr.get_ndo()->m_flags; if (tp.get_arrmeta_size() > 0) { tp.extended()->arrmeta_default_construct(result.get_arrmeta(), true); } return result; } } else if (tp.get_type_id() == fixed_dim_type_id) { ndt::type arr_tp = tp; ndt::type el_tp = arr_tp.extended<ndt::base_dim_type>()->get_element_type(); size_t el_data_size = el_tp.get_data_size(); // If the element type has a single chunk of POD memory, and // it divides into the memory size, it's ok if (data_size % (intptr_t)el_data_size == 0 && offset_is_aligned(reinterpret_cast<size_t>(data_ptr), arr_tp.get_data_alignment())) { intptr_t dim_size = data_size / el_data_size; if (arr_tp.get_kind() != kind_kind) { if (arr_tp.extended<ndt::fixed_dim_type>()->get_fixed_dim_size() != dim_size) { return nd::array(); } } else { // Transform the symbolic fixed type into a concrete one arr_tp = ndt::make_fixed_dim(dim_size, el_tp); } // Allocate a result array to attempt the view in it nd::array result(make_array_memory_block(arr_tp.get_arrmeta_size())); // Initialize the fields result.get_ndo()->data.ptr = data_ptr; result.get_ndo()->data.ref = data_ref.release(); result.get_ndo()->m_type = ndt::type(arr_tp).release(); result.get_ndo()->m_flags = arr.get_ndo()->m_flags; if (el_tp.get_arrmeta_size() > 0) { el_tp.extended()->arrmeta_default_construct(result.get_arrmeta() + sizeof(fixed_dim_type_arrmeta), true); } fixed_dim_type_arrmeta *fixed_meta = reinterpret_cast<fixed_dim_type_arrmeta *>(result.get_arrmeta()); fixed_meta->dim_size = dim_size; fixed_meta->stride = el_data_size; return result; } } // No view could be produced return nd::array(); }