ndt::type dynd::ndt::factor_categorical(const nd::array& values) { // Do the factor operation on a concrete version of the values // TODO: Some cases where we don't want to do this? nd::array values_eval = values.eval(); array_iter<0, 1> iter(values_eval); comparison_ckernel_builder k; ::make_comparison_kernel(&k, 0, iter.get_uniform_dtype(), iter.arrmeta(), iter.get_uniform_dtype(), iter.arrmeta(), comparison_type_sorting_less, &eval::default_eval_context); cmp less(k.get_function(), k.get()); set<const char *, cmp> uniques(less); if (!iter.empty()) { do { if (uniques.find(iter.data()) == uniques.end()) { uniques.insert(iter.data()); } } while (iter.next()); } // Copy the values (now sorted and unique) into a new nd::array nd::array categories = make_sorted_categories(uniques, iter.get_uniform_dtype(), iter.arrmeta()); return ndt::type(new categorical_type(categories, true), false); }
nd::array dynd::format_json(const nd::array &n, bool struct_as_list) { // Create a UTF-8 string nd::array result = nd::empty(ndt::string_type::make()); // Initialize the output with some memory output_data out; out.out_string.resize(1024); out.out_begin = out.out_string.begin(); out.out_capacity_end = out.out_string.end(); out.out_end = out.out_begin; out.struct_as_list = struct_as_list; if (!n.get_type().is_expression()) { ::format_json(out, n.get_type(), n.get_arrmeta(), n.get_readonly_originptr()); } else { nd::array tmp = n.eval(); ::format_json(out, tmp.get_type(), tmp.get_arrmeta(), tmp.get_readonly_originptr()); } // Shrink the memory to fit, and set the pointers in the output string *d = reinterpret_cast<string *>(result.get_readwrite_originptr()); d->assign(out.out_string.data(), out.out_end - out.out_begin); // Finalize processing and mark the result as immutable result.get_type().extended()->arrmeta_finalize_buffers(result.get_arrmeta()); result.flag_as_immutable(); return result; }
nd::array dynd::format_json(const nd::array& n) { // Create a UTF-8 string nd::array result = nd::empty(ndt::make_string()); // Initialize the output with some memory output_data out; out.blockref = reinterpret_cast<const string_type_metadata *>(result.get_ndo_meta())->blockref; out.api = get_memory_block_pod_allocator_api(out.blockref); out.api->allocate(out.blockref, 1024, 1, &out.out_begin, &out.out_capacity_end); out.out_end = out.out_begin; if (!n.get_type().is_expression()) { ::format_json(out, n.get_type(), n.get_ndo_meta(), n.get_readonly_originptr()); } else { nd::array tmp = n.eval(); ::format_json(out, tmp.get_type(), tmp.get_ndo_meta(), tmp.get_readonly_originptr()); } // Shrink the memory to fit, and set the pointers in the output string_type_data *d = reinterpret_cast<string_type_data *>(result.get_readwrite_originptr()); d->begin = out.out_begin; d->end = out.out_capacity_end; out.api->resize(out.blockref, out.out_end - out.out_begin, &d->begin, &d->end); // Finalize processing and mark the result as immutable result.get_type().extended()->metadata_finalize_buffers(result.get_ndo_meta()); result.flag_as_immutable(); return result; }
ndt::type ndt::factor_categorical(const nd::array &values) { // Do the factor operation on a concrete version of the values // TODO: Some cases where we don't want to do this? nd::array values_eval = values.eval(); intptr_t dim_size, stride; type el_tp; const char *el_arrmeta; values_eval.get_type().get_as_strided(values_eval.get()->metadata(), &dim_size, &stride, &el_tp, &el_arrmeta); nd::kernel_builder k; kernel_single_t fn = k.get()->get_function<kernel_single_t>(); cmp less(fn, k.get()); set<const char *, cmp> uniques(less); for (intptr_t i = 0; i < dim_size; ++i) { const char *data = values_eval.cdata() + i * stride; if (uniques.find(data) == uniques.end()) { uniques.insert(data); } } // Copy the values (now sorted and unique) into a new nd::array nd::array categories = make_sorted_categories(uniques, el_tp, el_arrmeta); return type(new categorical_type(categories, true), false); }
ndt::type ndt::factor_categorical(const nd::array &values) { // Do the factor operation on a concrete version of the values // TODO: Some cases where we don't want to do this? nd::array values_eval = values.eval(); intptr_t dim_size, stride; type el_tp; const char *el_arrmeta; values_eval.get_type().get_as_strided(values_eval.get_arrmeta(), &dim_size, &stride, &el_tp, &el_arrmeta); ckernel_builder<kernel_request_host> k; ::make_comparison_kernel(&k, 0, el_tp, el_arrmeta, el_tp, el_arrmeta, comparison_type_sorting_less, &eval::default_eval_context); expr_single_t fn = k.get()->get_function<expr_single_t>(); cmp less(fn, k.get()); set<const char *, cmp> uniques(less); for (intptr_t i = 0; i < dim_size; ++i) { const char *data = values_eval.get_readonly_originptr() + i * stride; if (uniques.find(data) == uniques.end()) { uniques.insert(data); } } // Copy the values (now sorted and unique) into a new nd::array nd::array categories = make_sorted_categories(uniques, el_tp, el_arrmeta); return type(new categorical_type(categories, true), false); }
static void json_as_buffer(const nd::array &json, nd::array &out_tmp_ref, const char *&begin, const char *&end) { // Check the type of 'json', and get pointers to the begin/end of a UTF-8 // buffer ndt::type json_type = json.get_type().value_type(); switch (json_type.get_kind()) { case string_kind: { const ndt::base_string_type *sdt = json_type.extended<ndt::base_string_type>(); switch (sdt->get_encoding()) { case string_encoding_ascii: case string_encoding_utf_8: out_tmp_ref = json.eval(); // The data is already UTF-8, so use the buffer directly sdt->get_string_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; default: { // The data needs to be converted to UTF-8 before parsing ndt::type utf8_tp = ndt::string_type::make(string_encoding_utf_8); out_tmp_ref = json.ucast(utf8_tp).eval(); sdt = static_cast<const ndt::base_string_type *>(utf8_tp.extended()); sdt->get_string_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; } } break; } case bytes_kind: { out_tmp_ref = json.eval(); const ndt::base_bytes_type *bdt = json_type.extended<ndt::base_bytes_type>(); bdt->get_bytes_range(&begin, &end, out_tmp_ref.get_arrmeta(), out_tmp_ref.get_readonly_originptr()); break; } default: { stringstream ss; ss << "Input for JSON parsing must be either bytes (interpreted as UTF-8) " "or a string, not \"" << json_type << "\""; throw runtime_error(ss.str()); break; } } }