nd::array nd::view(const nd::array& arr, const ndt::type& tp) { // If the types match exactly, simply return 'arr' if (arr.get_type() == tp) { return arr; } else if (arr.get_ndim() == tp.get_ndim()) { // Allocate a result array to attempt the view in it array result(make_array_memory_block(tp.get_metadata_size())); // Copy the fields result.get_ndo()->m_data_pointer = arr.get_ndo()->m_data_pointer; if (arr.get_ndo()->m_data_reference == NULL) { // Embedded data, need reference to the array result.get_ndo()->m_data_reference = arr.get_memblock().release(); } else { // Use the same data reference, avoid producing a chain result.get_ndo()->m_data_reference = arr.get_data_memblock().release(); } result.get_ndo()->m_type = ndt::type(tp).release(); result.get_ndo()->m_flags = arr.get_ndo()->m_flags; // Now try to copy the metadata as a view if (try_view(arr.get_type(), arr.get_ndo_meta(), tp, result.get_ndo_meta(), arr.get_memblock().get())) { // If it succeeded, return it return result; } // Otherwise fall through, let it get destructed, and raise an error } stringstream ss; ss << "Unable to view nd::array of type " << arr.get_type(); ss << "as type " << tp; throw type_error(ss.str()); }
nd::array dynd::format_json(const nd::array& n) { // Create a UTF-8 string nd::array result = nd::empty(ndt::make_string()); // Initialize the output with some memory output_data out; out.blockref = reinterpret_cast<const string_type_metadata *>(result.get_ndo_meta())->blockref; out.api = get_memory_block_pod_allocator_api(out.blockref); out.api->allocate(out.blockref, 1024, 1, &out.out_begin, &out.out_capacity_end); out.out_end = out.out_begin; if (!n.get_type().is_expression()) { ::format_json(out, n.get_type(), n.get_ndo_meta(), n.get_readonly_originptr()); } else { nd::array tmp = n.eval(); ::format_json(out, tmp.get_type(), tmp.get_ndo_meta(), tmp.get_readonly_originptr()); } // Shrink the memory to fit, and set the pointers in the output string_type_data *d = reinterpret_cast<string_type_data *>(result.get_readwrite_originptr()); d->begin = out.out_begin; d->end = out.out_capacity_end; out.api->resize(out.blockref, out.out_end - out.out_begin, &d->begin, &d->end); // Finalize processing and mark the result as immutable result.get_type().extended()->metadata_finalize_buffers(result.get_ndo_meta()); result.flag_as_immutable(); return result; }
void dynd::parse_json(nd::array &out, const char *json_begin, const char *json_end, const eval::eval_context *ectx) { try { const char *begin = json_begin, *end = json_end; ndt::type tp = out.get_type(); ::parse_json(tp, out.get_ndo_meta(), out.get_readwrite_originptr(), begin, end, ectx); begin = skip_whitespace(begin, end); if (begin != end) { throw json_parse_error(begin, "unexpected trailing JSON text", tp); } } catch (const json_parse_error& e) { stringstream ss; string line_prev, line_cur; int line, column; get_error_line_column(json_begin, json_end, e.get_position(), line_prev, line_cur, line, column); ss << "Error parsing JSON at line " << line << ", column " << column << "\n"; if (e.get_type().get_type_id() != uninitialized_type_id) { ss << "DType: " << e.get_type() << "\n"; } ss << "Message: " << e.get_message() << "\n"; print_json_parse_error_marker(ss, line_prev, line_cur, line, column); throw runtime_error(ss.str()); } }
static void set(const ndt::type& paramtype, char *metadata, char *data, const nd::array& value) { if (paramtype.get_type_id() == void_pointer_type_id) { // TODO: switch to a better mechanism for passing nd::array references *reinterpret_cast<const array_preamble **>(data) = value.get_ndo(); } else { typed_data_assign(paramtype, metadata, data, value.get_type(), value.get_ndo_meta(), value.get_ndo()->m_data_pointer); } }
static void json_as_buffer(const nd::array& json, nd::array& out_tmp_ref, const char *&begin, const char *&end) { // Check the type of 'json', and get pointers to the begin/end of a UTF-8 buffer ndt::type json_type = json.get_type().value_type(); switch (json_type.get_kind()) { case string_kind: { const base_string_type *sdt = static_cast<const base_string_type *>(json_type.extended()); switch (sdt->get_encoding()) { case string_encoding_ascii: case string_encoding_utf_8: out_tmp_ref = json.eval(); // The data is already UTF-8, so use the buffer directly sdt->get_string_range(&begin, &end, out_tmp_ref.get_ndo_meta(), out_tmp_ref.get_readonly_originptr()); break; default: { // The data needs to be converted to UTF-8 before parsing ndt::type utf8_tp = ndt::make_string(string_encoding_utf_8); out_tmp_ref = json.ucast(utf8_tp).eval(); sdt = static_cast<const base_string_type *>(utf8_tp.extended()); sdt->get_string_range(&begin, &end, out_tmp_ref.get_ndo_meta(), out_tmp_ref.get_readonly_originptr()); break; } } break; } case bytes_kind: { out_tmp_ref = json.eval(); const base_bytes_type *bdt = static_cast<const base_bytes_type *>(json_type.extended()); bdt->get_bytes_range(&begin, &end, out_tmp_ref.get_ndo_meta(), out_tmp_ref.get_readonly_originptr()); break; } default: { stringstream ss; ss << "Input for JSON parsing must be either bytes (interpreted as UTF-8) or a string, not "; ss << json_type; throw runtime_error(ss.str()); break; } } }
uint32_t categorical_type::get_value_from_category(const nd::array& category) const { if (category.get_type() == m_category_tp) { // If the type is right, get the category value directly return get_value_from_category(category.get_ndo_meta(), category.get_readonly_originptr()); } else { // Otherwise convert to the correct type, then get the category value nd::array c = nd::empty(m_category_tp); c.val_assign(category); return get_value_from_category(c.get_ndo_meta(), c.get_readonly_originptr()); } }
// Constructor which creates the output based on the input's broadcast shape array_iter(const ndt::type& op0_dtype, nd::array& out_op0, const nd::array& op1, const nd::array& op2, const nd::array& op3) { create_broadcast_result(op0_dtype, op1, op2, op3, out_op0, m_iter_ndim[0], m_itershape); nd::array ops[4] = {out_op0, op1, op2, op3}; m_array_tp[0] = out_op0.get_type(); m_array_tp[1] = op1.get_type(); m_array_tp[2] = op2.get_type(); m_array_tp[3] = op3.get_type(); m_itersize = 1; m_iter_ndim[1] = m_array_tp[1].get_ndim(); m_iter_ndim[2] = m_array_tp[2].get_ndim(); m_iter_ndim[3] = m_array_tp[3].get_ndim(); // Allocate and initialize the iterdata if (m_iter_ndim[0] != 0) { m_iterindex.init(m_iter_ndim[0]); memset(m_iterindex.get(), 0, sizeof(intptr_t) * m_iter_ndim[0]); // The destination iterdata size_t iterdata_size = m_array_tp[0].get_iterdata_size(m_iter_ndim[0]); m_iterdata[0] = reinterpret_cast<iterdata_common *>(malloc(iterdata_size)); if (!m_iterdata[0]) { throw std::bad_alloc(); } m_metadata[0] = out_op0.get_ndo_meta(); m_array_tp[0].iterdata_construct(m_iterdata[0], &m_metadata[0], m_iter_ndim[0], m_itershape.get(), m_uniform_tp[0]); m_data[0] = m_iterdata[0]->reset(m_iterdata[0], out_op0.get_readwrite_originptr(), m_iter_ndim[0]); // The op iterdata for (int i = 1; i < 4; ++i) { iterdata_size = m_array_tp[i].get_broadcasted_iterdata_size(m_iter_ndim[i]); m_iterdata[i] = reinterpret_cast<iterdata_common *>(malloc(iterdata_size)); if (!m_iterdata[i]) { throw std::bad_alloc(); } m_metadata[i] = ops[i].get_ndo_meta(); m_array_tp[i].broadcasted_iterdata_construct(m_iterdata[i], &m_metadata[i], m_iter_ndim[i], m_itershape.get() + (m_iter_ndim[0] - m_iter_ndim[i]), m_uniform_tp[i]); m_data[i] = m_iterdata[i]->reset(m_iterdata[i], ops[i].get_ndo()->m_data_pointer, m_iter_ndim[0]); } for (size_t i = 0, i_end = m_iter_ndim[0]; i != i_end; ++i) { m_itersize *= m_itershape[i]; } } else { for (size_t i = 0; i < 4; ++i) { m_iterdata[i] = NULL; m_uniform_tp[i] = m_array_tp[i]; m_data[i] = ops[i].get_ndo()->m_data_pointer; m_metadata[i] = ops[i].get_ndo_meta(); } } }
categorical_type::categorical_type(const nd::array& categories, bool presorted) : base_type(categorical_type_id, custom_kind, 4, 4, type_flag_scalar, 0, 0) { intptr_t category_count; if (presorted) { // This is construction shortcut, for the case when the categories are already // sorted. No validation of this is done, the caller should have ensured it // was correct already, typically by construction. m_categories = categories.eval_immutable(); m_category_tp = m_categories.get_type().at(0); category_count = categories.get_dim_size(); m_value_to_category_index.resize(category_count); m_category_index_to_value.resize(category_count); for (size_t i = 0; i != (size_t)category_count; ++i) { m_value_to_category_index[i] = i; m_category_index_to_value[i] = i; } } else { // Process the categories array to make sure it's valid const ndt::type& cdt = categories.get_type(); if (cdt.get_type_id() != strided_dim_type_id) { throw runtime_error("categorical_type only supports construction from a strided array of categories"); } m_category_tp = categories.get_type().at(0); if (!m_category_tp.is_scalar()) { throw runtime_error("categorical_type only supports construction from a 1-dimensional strided array of categories"); } category_count = categories.get_dim_size(); intptr_t categories_stride = reinterpret_cast<const strided_dim_type_metadata *>(categories.get_ndo_meta())->stride; const char *categories_element_metadata = categories.get_ndo_meta() + sizeof(strided_dim_type_metadata); comparison_ckernel_builder k; ::make_comparison_kernel(&k, 0, m_category_tp, categories_element_metadata, m_category_tp, categories_element_metadata, comparison_type_sorting_less, &eval::default_eval_context); cmp less(k.get_function(), k.get()); set<const char *, cmp> uniques(less); m_value_to_category_index.resize(category_count); m_category_index_to_value.resize(category_count); // create the mapping from indices of (to be lexicographically sorted) categories to values for (size_t i = 0; i != (size_t)category_count; ++i) { m_category_index_to_value[i] = i; const char *category_value = categories.get_readonly_originptr() + i * categories_stride; if (uniques.find(category_value) == uniques.end()) { uniques.insert(category_value); } else { stringstream ss; ss << "categories must be unique: category value "; m_category_tp.print_data(ss, categories_element_metadata, category_value); ss << " appears more than once"; throw std::runtime_error(ss.str()); } } // TODO: Putting everything in a set already caused a sort operation to occur, // there's no reason we should need a second sort. std::sort(m_category_index_to_value.begin(), m_category_index_to_value.end(), sorter(categories.get_readonly_originptr(), categories_stride, k.get_function(), k.get())); // invert the m_category_index_to_value permutation for (uint32_t i = 0; i < m_category_index_to_value.size(); ++i) { m_value_to_category_index[m_category_index_to_value[i]] = i; } m_categories = make_sorted_categories(uniques, m_category_tp, categories_element_metadata); } // Use the number of categories to set which underlying integer storage to use if (category_count <= 256) { m_storage_type = ndt::make_type<uint8_t>(); } else if (category_count <= 65536) { m_storage_type = ndt::make_type<uint16_t>(); } else { m_storage_type = ndt::make_type<uint32_t>(); } m_members.data_size = m_storage_type.get_data_size(); m_members.data_alignment = (uint8_t)m_storage_type.get_data_alignment(); }
array_iter(const nd::array& op0) { init(op0.get_type(), op0.get_ndo_meta(), op0.get_readwrite_originptr()); }
dynd::nd::array pydynd::nd_fields(const nd::array& n, PyObject *field_list) { vector<string> selected_fields; pyobject_as_vector_string(field_list, selected_fields); // TODO: Move this implementation into dynd ndt::type fdt = n.get_dtype(); if (fdt.get_kind() != struct_kind) { stringstream ss; ss << "nd.fields must be given a dynd array of 'struct' kind, not "; ss << fdt; throw runtime_error(ss.str()); } const base_struct_type *bsd = static_cast<const base_struct_type *>(fdt.extended()); const ndt::type *field_types = bsd->get_field_types(); if (selected_fields.empty()) { throw runtime_error("nd.fields requires at least one field name to be specified"); } // Construct the field mapping and output field types vector<intptr_t> selected_index(selected_fields.size()); vector<ndt::type> selected_ndt_types(selected_fields.size()); for (size_t i = 0; i != selected_fields.size(); ++i) { selected_index[i] = bsd->get_field_index(selected_fields[i]); if (selected_index[i] < 0) { stringstream ss; ss << "field name "; print_escaped_utf8_string(ss, selected_fields[i]); ss << " does not exist in dynd type " << fdt; throw runtime_error(ss.str()); } selected_ndt_types[i] = field_types[selected_index[i]]; } // Create the result udt ndt::type rudt = ndt::make_struct(selected_ndt_types, selected_fields); ndt::type result_tp = n.get_type().with_replaced_dtype(rudt); const base_struct_type *rudt_bsd = static_cast<const base_struct_type *>(rudt.extended()); // Allocate the new memory block. size_t metadata_size = result_tp.get_metadata_size(); nd::array result(make_array_memory_block(metadata_size)); // Clone the data pointer result.get_ndo()->m_data_pointer = n.get_ndo()->m_data_pointer; result.get_ndo()->m_data_reference = n.get_ndo()->m_data_reference; if (result.get_ndo()->m_data_reference == NULL) { result.get_ndo()->m_data_reference = n.get_memblock().get(); } memory_block_incref(result.get_ndo()->m_data_reference); // Copy the flags result.get_ndo()->m_flags = n.get_ndo()->m_flags; // Set the type and transform the metadata result.get_ndo()->m_type = ndt::type(result_tp).release(); // First copy all the array data type metadata ndt::type tmp_dt = result_tp; char *dst_metadata = result.get_ndo_meta(); const char *src_metadata = n.get_ndo_meta(); while (tmp_dt.get_ndim() > 0) { if (tmp_dt.get_kind() != uniform_dim_kind) { throw runtime_error("nd.fields doesn't support dimensions with pointers yet"); } const base_uniform_dim_type *budd = static_cast<const base_uniform_dim_type *>( tmp_dt.extended()); size_t offset = budd->metadata_copy_construct_onedim(dst_metadata, src_metadata, n.get_memblock().get()); dst_metadata += offset; src_metadata += offset; tmp_dt = budd->get_element_type(); } // Then create the metadata for the new struct const size_t *metadata_offsets = bsd->get_metadata_offsets(); const size_t *result_metadata_offsets = rudt_bsd->get_metadata_offsets(); const size_t *data_offsets = bsd->get_data_offsets(src_metadata); size_t *result_data_offsets = reinterpret_cast<size_t *>(dst_metadata); for (size_t i = 0; i != selected_fields.size(); ++i) { const ndt::type& dt = selected_ndt_types[i]; // Copy the data offset result_data_offsets[i] = data_offsets[selected_index[i]]; // Copy the metadata for this field if (dt.get_metadata_size() > 0) { dt.extended()->metadata_copy_construct(dst_metadata + result_metadata_offsets[i], src_metadata + metadata_offsets[selected_index[i]], n.get_memblock().get()); } } return result; }