/** * This function promotes the requested `axis` from * a strided dim to a var dim. It modifies `shape`, `coord`, * `elem`, and `arr` to point to a new array, and * copies the data over. */ static void promote_nd_arr_dim(std::vector<intptr_t> &shape, std::vector<afpd_coordentry> &coord, afpd_dtype &elem, nd::array &arr, intptr_t axis, bool copy_final_coord) { vector<afpd_coordentry> newcoord; afpd_dtype newelem; newelem.dtp = elem.dtp; // Convert the axis into a var dim shape[axis] = -1; // Create the new array nd::array newarr = allocate_nd_arr(shape, newcoord, newelem, axis); // Copy the data up to, but not including, the current `coord` // from the old `arr` to the new one. The recursion stops // at `axis`, where all subsequent dimensions are handled by the // created kernel. ckernel_builder<kernel_request_host> k; if (elem.dtp.get_type_id() != uninitialized_type_id) { make_assignment_kernel(&k, 0, newcoord[axis].tp, newcoord[axis].arrmeta_ptr, coord[axis].tp, coord[axis].arrmeta_ptr, kernel_request_strided, &eval::default_eval_context); } copy_to_promoted_nd_arr(shape, newarr.get_readwrite_originptr(), newcoord, newelem, arr.get_readonly_originptr(), coord, elem, k, 0, axis, copy_final_coord, true); arr.swap(newarr); coord.swap(newcoord); elem.swap(newelem); }
nd::array dynd::format_json(const nd::array& n) { // Create a UTF-8 string nd::array result = nd::empty(ndt::make_string()); // Initialize the output with some memory output_data out; out.blockref = reinterpret_cast<const string_type_metadata *>(result.get_ndo_meta())->blockref; out.api = get_memory_block_pod_allocator_api(out.blockref); out.api->allocate(out.blockref, 1024, 1, &out.out_begin, &out.out_capacity_end); out.out_end = out.out_begin; if (!n.get_type().is_expression()) { ::format_json(out, n.get_type(), n.get_ndo_meta(), n.get_readonly_originptr()); } else { nd::array tmp = n.eval(); ::format_json(out, tmp.get_type(), tmp.get_ndo_meta(), tmp.get_readonly_originptr()); } // Shrink the memory to fit, and set the pointers in the output string_type_data *d = reinterpret_cast<string_type_data *>(result.get_readwrite_originptr()); d->begin = out.out_begin; d->end = out.out_capacity_end; out.api->resize(out.blockref, out.out_end - out.out_begin, &d->begin, &d->end); // Finalize processing and mark the result as immutable result.get_type().extended()->metadata_finalize_buffers(result.get_ndo_meta()); result.flag_as_immutable(); return result; }
nd::array dynd::format_json(const nd::array &n, bool struct_as_list) { // Create a UTF-8 string nd::array result = nd::empty(ndt::string_type::make()); // Initialize the output with some memory output_data out; out.out_string.resize(1024); out.out_begin = out.out_string.begin(); out.out_capacity_end = out.out_string.end(); out.out_end = out.out_begin; out.struct_as_list = struct_as_list; if (!n.get_type().is_expression()) { ::format_json(out, n.get_type(), n.get_arrmeta(), n.get_readonly_originptr()); } else { nd::array tmp = n.eval(); ::format_json(out, tmp.get_type(), tmp.get_arrmeta(), tmp.get_readonly_originptr()); } // Shrink the memory to fit, and set the pointers in the output string *d = reinterpret_cast<string *>(result.get_readwrite_originptr()); d->assign(out.out_string.data(), out.out_end - out.out_begin); // Finalize processing and mark the result as immutable result.get_type().extended()->arrmeta_finalize_buffers(result.get_arrmeta()); result.flag_as_immutable(); return result; }
static void json_as_buffer(const nd::array &json, nd::array &out_tmp_ref, const char *&begin, const char *&end) { // Check the type of 'json', and get pointers to the begin/end of a UTF-8 // buffer ndt::type json_type = json.get_type().value_type(); switch (json_type.get_kind()) { case string_kind: { const ndt::base_string_type *sdt = json_type.extended<ndt::base_string_type>(); switch (sdt->get_encoding()) { case string_encoding_ascii: case string_encoding_utf_8: out_tmp_ref = json.eval(); // The data is already UTF-8, so use the buffer directly sdt->get_string_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; default: { // The data needs to be converted to UTF-8 before parsing ndt::type utf8_tp = ndt::string_type::make(string_encoding_utf_8); out_tmp_ref = json.ucast(utf8_tp).eval(); sdt = static_cast<const ndt::base_string_type *>(utf8_tp.extended()); sdt->get_string_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; } } break; } case bytes_kind: { out_tmp_ref = json.eval(); const ndt::base_bytes_type *bdt = json_type.extended<ndt::base_bytes_type>(); bdt->get_bytes_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; } default: { stringstream ss; ss << "Input for JSON parsing must be either bytes (interpreted as UTF-8) " "or a string, not \"" << json_type << "\""; throw runtime_error(ss.str()); break; } } }
void dynd::typed_data_assign(const ndt::type &dst_tp, const char *dst_arrmeta, char *dst_data, const nd::array &src_arr, const eval::eval_context *ectx) { typed_data_assign(dst_tp, dst_arrmeta, dst_data, src_arr.get_type(), src_arr.get_arrmeta(), src_arr.get_readonly_originptr(), ectx); }
const char *get_category_data_from_value(size_t value) const { if (value >= get_category_count()) { throw std::runtime_error("category value is out of bounds"); } return m_categories.get_readonly_originptr() + m_value_to_category_index[value] * reinterpret_cast<const strided_dim_type_arrmeta *>( m_categories.get_arrmeta())->stride; }
const char *get_category_data_from_value(uint32_t value) const { if (value >= get_category_count()) { throw std::runtime_error("category value is out of bounds"); } return m_categories.get_readonly_originptr() + unchecked_fixed_dim_get<intptr_t>(m_value_to_category_index, value) * reinterpret_cast<const fixed_dim_type_arrmeta *>( m_categories.get_arrmeta())->stride; }
uint32_t categorical_type::get_value_from_category(const nd::array& category) const { if (category.get_type() == m_category_tp) { // If the type is right, get the category value directly return get_value_from_category(category.get_arrmeta(), category.get_readonly_originptr()); } else { // Otherwise convert to the correct type, then get the category value nd::array c = nd::empty(m_category_tp); c.val_assign(category); return get_value_from_category(c.get_arrmeta(), c.get_readonly_originptr()); } }
static nd::array view_concrete(const nd::array &arr, const ndt::type &tp) { // Allocate a result array to attempt the view in it nd::array result(make_array_memory_block(tp.get_arrmeta_size())); // Copy the fields result.get_ndo()->data.ptr = arr.get_ndo()->data.ptr; if (arr.get_ndo()->data.ref == NULL) { // Embedded data, need reference to the array result.get_ndo()->data.ref = arr.get_memblock().release(); } else { // Use the same data reference, avoid producing a chain result.get_ndo()->data.ref = arr.get_data_memblock().release(); } result.get_ndo()->m_type = ndt::type(tp).release(); result.get_ndo()->m_flags = arr.get_ndo()->m_flags; // First handle a special case of viewing outermost "var" as "fixed[#]" if (arr.get_type().get_type_id() == var_dim_type_id && tp.get_type_id() == fixed_dim_type_id) { const var_dim_type_arrmeta *in_am = reinterpret_cast<const var_dim_type_arrmeta *>(arr.get_arrmeta()); const var_dim_type_data *in_dat = reinterpret_cast<const var_dim_type_data *>(arr.get_readonly_originptr()); fixed_dim_type_arrmeta *out_am = reinterpret_cast<fixed_dim_type_arrmeta *>(result.get_arrmeta()); out_am->dim_size = tp.extended<ndt::fixed_dim_type>()->get_fixed_dim_size(); out_am->stride = in_am->stride; if ((intptr_t)in_dat->size == out_am->dim_size) { // Use the more specific data reference from the var arrmeta if possible if (in_am->blockref != NULL) { memory_block_decref(result.get_ndo()->data.ref); memory_block_incref(in_am->blockref); result.get_ndo()->data.ref = in_am->blockref; } result.get_ndo()->data.ptr = in_dat->begin + in_am->offset; // Try to copy the rest of the arrmeta as a view if (try_view(arr.get_type().extended<ndt::base_dim_type>()->get_element_type(), arr.get_arrmeta() + sizeof(var_dim_type_arrmeta), tp.extended<ndt::base_dim_type>()->get_element_type(), result.get_arrmeta() + sizeof(fixed_dim_type_arrmeta), arr.get_memblock().get())) { return result; } } } // Otherwise try to copy the arrmeta as a view else if (try_view(arr.get_type(), arr.get_arrmeta(), tp, result.get_arrmeta(), arr.get_memblock().get())) { // If it succeeded, return it return result; } stringstream ss; ss << "Unable to view nd::array of type " << arr.get_type(); ss << " as type " << tp; throw type_error(ss.str()); }
/** * Substitutes the field types for contiguous array of types */ static nd::array substitute_type_array(const nd::array &type_array, const std::map<std::string, ndt::type> &typevars, bool concrete) { intptr_t field_count = type_array.get_dim_size(); const ndt::type *field_types = reinterpret_cast<const ndt::type *>(type_array.get_readonly_originptr()); nd::array tmp_field_types(nd::empty(field_count, ndt::make_type())); ndt::type *ftraw = reinterpret_cast<ndt::type *>(tmp_field_types.get_readwrite_originptr()); for (intptr_t i = 0; i < field_count; ++i) { ftraw[i] = ndt::substitute(field_types[i], typevars, concrete); } return tmp_field_types; }
/** * This function promotes the dtype the array currently has with * `tp`, allocates a new one, then copies all the data up to the * current index in `coord`. This modifies coord and elem in place. */ static void promote_nd_arr_dtype(const std::vector<intptr_t> &shape, std::vector<afpd_coordentry> &coord, afpd_dtype &elem, nd::array &arr, const ndt::type &tp) { intptr_t ndim = shape.size(); vector<afpd_coordentry> newcoord; afpd_dtype newelem; if (elem.dtp.get_type_id() == uninitialized_type_id) { // If the `elem` dtype is uninitialized, it means a dummy // array was created to capture dimensional structure until // the first value is encountered newelem.dtp = tp; } else { newelem.dtp = promote_types_arithmetic(elem.dtp, tp); } // Create the new array nd::array newarr = allocate_nd_arr(shape, newcoord, newelem, ndim); // Copy the data up to, but not including, the current `coord` // from the old `arr` to the new one ckernel_builder<kernel_request_host> k; if (elem.dtp.get_type_id() != uninitialized_type_id) { make_assignment_kernel(&k, 0, newelem.dtp, newelem.arrmeta_ptr, elem.dtp, elem.arrmeta_ptr, kernel_request_strided, &eval::default_eval_context); } else { // An assignment kernel which copies one byte - will only // be called with count==0 when dtp is uninitialized make_assignment_kernel(&k, 0, ndt::type::make<char>(), NULL, ndt::type::make<char>(), NULL, kernel_request_strided, &eval::default_eval_context); } copy_to_promoted_nd_arr(shape, newarr.get_readwrite_originptr(), newcoord, newelem, arr.get_readonly_originptr(), coord, elem, k, 0, ndim, false, true); arr.swap(newarr); coord.swap(newcoord); elem.swap(newelem); }
nd::callable::callable(const nd::array &rhs) { if (!rhs.is_null()) { if (rhs.get_type().get_type_id() == callable_type_id) { const callable_type_data *af = reinterpret_cast<const callable_type_data *>( rhs.get_readonly_originptr()); if (af->instantiate != NULL) { // It's valid: callable type, contains instantiate function. m_value = rhs; } else { throw invalid_argument("Require a non-empty callable, " "provided callable has NULL " "instantiate function"); } } else { stringstream ss; ss << "Cannot implicitly convert nd::array of type " << rhs.get_type().value_type() << " to callable"; throw type_error(ss.str()); } } }
inline const uintptr_t *get_data_offsets_raw() const { return reinterpret_cast<const uintptr_t *>( m_data_offsets.get_readonly_originptr()); }
const type *get_field_types_raw() const { return reinterpret_cast<const type *>( m_field_types.get_readonly_originptr()); }
const uintptr_t *get_data_offsets(const char *DYND_UNUSED(arrmeta)) const { return reinterpret_cast<const uintptr_t *>( m_data_offsets.get_readonly_originptr()); }
nd::array dynd::struct_concat(nd::array lhs, nd::array rhs) { nd::array res; if (lhs.is_null()) { res = rhs; return res; } if (rhs.is_null()) { res = lhs; return res; } const ndt::type &lhs_tp = lhs.get_type(), &rhs_tp = rhs.get_type(); if (lhs_tp.get_kind() != struct_kind) { stringstream ss; ss << "Cannot concatenate array with type " << lhs_tp << " as a struct"; throw invalid_argument(ss.str()); } if (rhs_tp.get_kind() != struct_kind) { stringstream ss; ss << "Cannot concatenate array with type " << rhs_tp << " as a struct"; throw invalid_argument(ss.str()); } // Make an empty shell struct by concatenating the fields together intptr_t lhs_n = lhs_tp.extended<ndt::base_struct_type>()->get_field_count(); intptr_t rhs_n = rhs_tp.extended<ndt::base_struct_type>()->get_field_count(); intptr_t res_n = lhs_n + rhs_n; nd::array res_field_names = nd::empty(res_n, ndt::string_type::make()); nd::array res_field_types = nd::empty(res_n, ndt::make_type()); res_field_names(irange(0, lhs_n)).vals() = lhs_tp.extended<ndt::base_struct_type>()->get_field_names(); res_field_names(irange(lhs_n, res_n)).vals() = rhs_tp.extended<ndt::base_struct_type>()->get_field_names(); res_field_types(irange(0, lhs_n)).vals() = lhs_tp.extended<ndt::base_struct_type>()->get_field_types(); res_field_types(irange(lhs_n, res_n)).vals() = rhs_tp.extended<ndt::base_struct_type>()->get_field_types(); ndt::type res_tp = ndt::struct_type::make(res_field_names, res_field_types); const ndt::type *res_field_tps = res_tp.extended<ndt::base_struct_type>()->get_field_types_raw(); res = nd::empty_shell(res_tp); // Initialize the default data offsets for the struct arrmeta ndt::struct_type::fill_default_data_offsets(res_n, res_tp.extended<ndt::base_struct_type>()->get_field_types_raw(), reinterpret_cast<uintptr_t *>(res.get_arrmeta())); // Get information about the arrmeta layout of the input and res const uintptr_t *lhs_arrmeta_offsets = lhs_tp.extended<ndt::base_struct_type>()->get_arrmeta_offsets_raw(); const uintptr_t *rhs_arrmeta_offsets = rhs_tp.extended<ndt::base_struct_type>()->get_arrmeta_offsets_raw(); const uintptr_t *res_arrmeta_offsets = res_tp.extended<ndt::base_struct_type>()->get_arrmeta_offsets_raw(); const char *lhs_arrmeta = lhs.get_arrmeta(); const char *rhs_arrmeta = rhs.get_arrmeta(); char *res_arrmeta = res.get_arrmeta(); // Copy the arrmeta from the input arrays for (intptr_t i = 0; i < lhs_n; ++i) { const ndt::type &tp = res_field_tps[i]; if (!tp.is_builtin()) { tp.extended()->arrmeta_copy_construct(res_arrmeta + res_arrmeta_offsets[i], lhs_arrmeta + lhs_arrmeta_offsets[i], lhs.get_data_memblock().get()); } } for (intptr_t i = 0; i < rhs_n; ++i) { const ndt::type &tp = res_field_tps[i + lhs_n]; if (!tp.is_builtin()) { tp.extended()->arrmeta_copy_construct(res_arrmeta + res_arrmeta_offsets[i + lhs_n], rhs_arrmeta + rhs_arrmeta_offsets[i], rhs.get_data_memblock().get()); } } // Get information about the data layout of the input and res const uintptr_t *lhs_data_offsets = lhs_tp.extended<ndt::base_struct_type>()->get_data_offsets(lhs.get_arrmeta()); const uintptr_t *rhs_data_offsets = rhs_tp.extended<ndt::base_struct_type>()->get_data_offsets(rhs.get_arrmeta()); const uintptr_t *res_data_offsets = res_tp.extended<ndt::base_struct_type>()->get_data_offsets(res.get_arrmeta()); const char *lhs_data = lhs.get_readonly_originptr(); const char *rhs_data = rhs.get_readonly_originptr(); char *res_data = res.get_readwrite_originptr(); // Copy the data from the input arrays for (intptr_t i = 0; i < lhs_n; ++i) { const ndt::type &tp = res_field_tps[i]; typed_data_copy(tp, res_arrmeta + res_arrmeta_offsets[i], res_data + res_data_offsets[i], lhs_arrmeta + lhs_arrmeta_offsets[i], lhs_data + lhs_data_offsets[i]); } for (intptr_t i = 0; i < rhs_n; ++i) { const ndt::type &tp = res_field_tps[i + lhs_n]; typed_data_copy(tp, res_arrmeta + res_arrmeta_offsets[i + lhs_n], res_data + res_data_offsets[i + lhs_n], rhs_arrmeta + rhs_arrmeta_offsets[i], rhs_data + rhs_data_offsets[i]); } return res; }
/** * Adds a ckernel layer for processing one dimension of the reduction. * This is for a strided dimension which is being broadcast, and is * the final dimension before the accumulation operation. */ static size_t make_strided_inner_broadcast_dimension_kernel( const callable_type_data *elwise_reduction_const, const ndt::callable_type *elwise_reduction_tp, const callable_type_data *dst_initialization_const, const ndt::callable_type *dst_initialization_tp, void *ckb, intptr_t ckb_offset, intptr_t dst_stride, intptr_t src_stride, intptr_t src_size, const ndt::type &dst_tp, const char *dst_arrmeta, const ndt::type &src_tp, const char *src_arrmeta, bool right_associative, const nd::array &reduction_identity, kernel_request_t kernreq, const eval::eval_context *ectx) { callable_type_data *elwise_reduction = const_cast<callable_type_data *>(elwise_reduction_const); callable_type_data *dst_initialization = const_cast<callable_type_data *>(dst_initialization_const); intptr_t root_ckb_offset = ckb_offset; strided_inner_broadcast_kernel_extra *e = reinterpret_cast<ckernel_builder<kernel_request_host> *>(ckb) ->alloc_ck<strided_inner_broadcast_kernel_extra>(ckb_offset); e->destructor = &strided_inner_broadcast_kernel_extra::destruct; // Cannot have both a dst_initialization kernel and a reduction identity if (dst_initialization != NULL && !reduction_identity.is_null()) { throw invalid_argument( "make_lifted_reduction_ckernel: cannot specify" " both a dst_initialization kernel and a reduction_identity"); } if (reduction_identity.is_null()) { // Get the function pointer for the first_call, for the case with // no reduction identity if (kernreq == kernel_request_single) { e->set_first_call_function( &strided_inner_broadcast_kernel_extra::single_first); } else if (kernreq == kernel_request_strided) { e->set_first_call_function( &strided_inner_broadcast_kernel_extra::strided_first); } else { stringstream ss; ss << "make_lifted_reduction_ckernel: unrecognized request " << (int)kernreq; throw runtime_error(ss.str()); } } else { // Get the function pointer for the first_call, for the case with // a reduction identity if (kernreq == kernel_request_single) { e->set_first_call_function( &strided_inner_broadcast_kernel_extra::single_first_with_ident); } else if (kernreq == kernel_request_strided) { e->set_first_call_function( &strided_inner_broadcast_kernel_extra::strided_first_with_ident); } else { stringstream ss; ss << "make_lifted_reduction_ckernel: unrecognized request " << (int)kernreq; throw runtime_error(ss.str()); } if (reduction_identity.get_type() != dst_tp) { stringstream ss; ss << "make_lifted_reduction_ckernel: reduction identity type "; ss << reduction_identity.get_type() << " does not match dst type "; ss << dst_tp; throw runtime_error(ss.str()); } e->ident_data = reduction_identity.get_readonly_originptr(); e->ident_ref = reduction_identity.get_memblock().release(); } // The function pointer for followup accumulation calls e->set_followup_call_function( &strided_inner_broadcast_kernel_extra::strided_followup); // The striding parameters e->dst_stride = dst_stride; e->src_stride = src_stride; e->size = src_size; // Validate that the provided callables are unary operations, // and have the correct types if (elwise_reduction_tp->get_npos() != 1 && elwise_reduction_tp->get_npos() != 2) { stringstream ss; ss << "make_lifted_reduction_ckernel: elwise reduction ckernel "; ss << "funcproto must be unary or a binary expr with all equal types"; throw runtime_error(ss.str()); } if (elwise_reduction_tp->get_return_type() != dst_tp) { stringstream ss; ss << "make_lifted_reduction_ckernel: elwise reduction ckernel "; ss << "dst type is " << elwise_reduction_tp->get_return_type(); ss << ", expected " << dst_tp; throw type_error(ss.str()); } if (elwise_reduction_tp->get_pos_type(0) != src_tp) { stringstream ss; ss << "make_lifted_reduction_ckernel: elwise reduction ckernel "; ss << "src type is " << elwise_reduction_tp->get_return_type(); ss << ", expected " << src_tp; throw type_error(ss.str()); } if (dst_initialization != NULL) { check_dst_initialization(dst_initialization_tp, dst_tp, src_tp); } if (elwise_reduction_tp->get_npos() == 2) { ckb_offset = kernels::wrap_binary_as_unary_reduction_ckernel( ckb, ckb_offset, right_associative, kernel_request_strided); ndt::type src_tp_doubled[2] = {src_tp, src_tp}; const char *src_arrmeta_doubled[2] = {src_arrmeta, src_arrmeta}; ckb_offset = elwise_reduction->instantiate( elwise_reduction->static_data, 0, NULL, ckb, ckb_offset, dst_tp, dst_arrmeta, elwise_reduction_tp->get_npos(), src_tp_doubled, src_arrmeta_doubled, kernel_request_strided, ectx, nd::array(), std::map<nd::string, ndt::type>()); } else { ckb_offset = elwise_reduction->instantiate( elwise_reduction->static_data, 0, NULL, ckb, ckb_offset, dst_tp, dst_arrmeta, elwise_reduction_tp->get_npos(), &src_tp, &src_arrmeta, kernel_request_strided, ectx, nd::array(), std::map<nd::string, ndt::type>()); } // Make sure there's capacity for the next ckernel reinterpret_cast<ckernel_builder<kernel_request_host> *>(ckb) ->reserve(ckb_offset + sizeof(ckernel_prefix)); // Need to retrieve 'e' again because it may have moved e = reinterpret_cast<ckernel_builder<kernel_request_host> *>(ckb) ->get_at<strided_inner_broadcast_kernel_extra>(root_ckb_offset); e->dst_init_kernel_offset = ckb_offset - root_ckb_offset; if (dst_initialization != NULL) { ckb_offset = dst_initialization->instantiate( dst_initialization->static_data, 0, NULL, ckb, ckb_offset, dst_tp, dst_arrmeta, elwise_reduction_tp->get_npos(), &src_tp, &src_arrmeta, kernel_request_strided, ectx, nd::array(), std::map<nd::string, ndt::type>()); } else if (reduction_identity.is_null()) { ckb_offset = make_assignment_kernel(ckb, ckb_offset, dst_tp, dst_arrmeta, src_tp, src_arrmeta, kernel_request_strided, ectx); } else { ckb_offset = make_assignment_kernel( ckb, ckb_offset, dst_tp, dst_arrmeta, reduction_identity.get_type(), reduction_identity.get_arrmeta(), kernel_request_strided, ectx); } return ckb_offset; }
categorical_type::categorical_type(const nd::array& categories, bool presorted) : base_type(categorical_type_id, custom_kind, 4, 4, type_flag_scalar, 0, 0, 0) { intptr_t category_count; if (presorted) { // This is construction shortcut, for the case when the categories are already // sorted. No validation of this is done, the caller should have ensured it // was correct already, typically by construction. m_categories = categories.eval_immutable(); m_category_tp = m_categories.get_type().at(0); category_count = categories.get_dim_size(); m_value_to_category_index.resize(category_count); m_category_index_to_value.resize(category_count); for (size_t i = 0; i != (size_t)category_count; ++i) { m_value_to_category_index[i] = i; m_category_index_to_value[i] = i; } } else { // Process the categories array to make sure it's valid const ndt::type& cdt = categories.get_type(); if (cdt.get_type_id() != strided_dim_type_id) { throw dynd::type_error("categorical_type only supports construction from a strided array of categories"); } m_category_tp = categories.get_type().at(0); if (!m_category_tp.is_scalar()) { throw dynd::type_error("categorical_type only supports construction from a 1-dimensional strided array of categories"); } category_count = categories.get_dim_size(); intptr_t categories_stride = reinterpret_cast<const strided_dim_type_arrmeta *>(categories.get_arrmeta())->stride; const char *categories_element_arrmeta = categories.get_arrmeta() + sizeof(strided_dim_type_arrmeta); comparison_ckernel_builder k; ::make_comparison_kernel(&k, 0, m_category_tp, categories_element_arrmeta, m_category_tp, categories_element_arrmeta, comparison_type_sorting_less, &eval::default_eval_context); cmp less(k.get_function(), k.get()); set<const char *, cmp> uniques(less); m_value_to_category_index.resize(category_count); m_category_index_to_value.resize(category_count); // create the mapping from indices of (to be lexicographically sorted) categories to values for (size_t i = 0; i != (size_t)category_count; ++i) { m_category_index_to_value[i] = i; const char *category_value = categories.get_readonly_originptr() + i * categories_stride; if (uniques.find(category_value) == uniques.end()) { uniques.insert(category_value); } else { stringstream ss; ss << "categories must be unique: category value "; m_category_tp.print_data(ss, categories_element_arrmeta, category_value); ss << " appears more than once"; throw std::runtime_error(ss.str()); } } // TODO: Putting everything in a set already caused a sort operation to occur, // there's no reason we should need a second sort. std::sort(m_category_index_to_value.begin(), m_category_index_to_value.end(), sorter(categories.get_readonly_originptr(), categories_stride, k.get_function(), k.get())); // invert the m_category_index_to_value permutation for (uint32_t i = 0; i < m_category_index_to_value.size(); ++i) { m_value_to_category_index[m_category_index_to_value[i]] = i; } m_categories = make_sorted_categories(uniques, m_category_tp, categories_element_arrmeta); } // Use the number of categories to set which underlying integer storage to use if (category_count <= 256) { m_storage_type = ndt::make_type<uint8_t>(); } else if (category_count <= 65536) { m_storage_type = ndt::make_type<uint16_t>(); } else { m_storage_type = ndt::make_type<uint32_t>(); } m_members.data_size = m_storage_type.get_data_size(); m_members.data_alignment = (uint8_t)m_storage_type.get_data_alignment(); }
const arrfunc_type_data *get_is_avail_arrfunc() const { return reinterpret_cast<const arrfunc_type_data *>( m_nafunc.get_readonly_originptr()); }
const arrfunc_type_data *get_assign_na_arrfunc() const { return reinterpret_cast<const arrfunc_type_data *>( m_nafunc.get_readonly_originptr()) + 1; }
inline const arrfunc_type_data *get() const { return !m_value.is_null() ? reinterpret_cast<const arrfunc_type_data *>( m_value.get_readonly_originptr()) : NULL; }
array_iter(const nd::array& op0, const nd::array& op1) { init(op0.get_type(), op0.get_ndo_meta(), op0.get_readwrite_originptr(), op1.get_type(), op1.get_ndo_meta(), op1.get_readonly_originptr()); }