static nd::array array_function_dereference(const nd::array &self) { // Follow the pointers to eliminate them ndt::type dt = self.get_type(); const char *arrmeta = self.get()->metadata(); char *data = self.get()->data; memory_block_data *dataref = self.get()->owner.get(); if (dataref == NULL) { dataref = self.get(); } uint64_t flags = self.get()->flags; while (dt.get_type_id() == pointer_type_id) { const pointer_type_arrmeta *md = reinterpret_cast<const pointer_type_arrmeta *>(arrmeta); dt = dt.extended<ndt::pointer_type>()->get_target_type(); arrmeta += sizeof(pointer_type_arrmeta); data = *reinterpret_cast<char **>(data) + md->offset; dataref = md->blockref.get(); } // Create an array without the pointers nd::array result(make_array_memory_block(dt.get_arrmeta_size())); if (!dt.is_builtin()) { dt.extended()->arrmeta_copy_construct(result.get()->metadata(), arrmeta, self); } result.get()->type = dt.release(); result.get()->data = data; result.get()->owner = dataref; result.get()->flags = flags; return result; }
static void json_as_buffer(const nd::array &json, nd::array &out_tmp_ref, const char *&begin, const char *&end) { // Check the type of 'json', and get pointers to the begin/end of a UTF-8 // buffer ndt::type json_type = json.get_type().value_type(); switch (json_type.get_kind()) { case string_kind: { const ndt::base_string_type *sdt = json_type.extended<ndt::base_string_type>(); switch (sdt->get_encoding()) { case string_encoding_ascii: case string_encoding_utf_8: out_tmp_ref = json.eval(); // The data is already UTF-8, so use the buffer directly sdt->get_string_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata()); break; default: { // The data needs to be converted to UTF-8 before parsing ndt::type utf8_tp = ndt::string_type::make(); out_tmp_ref = json.ucast(utf8_tp).eval(); sdt = static_cast<const ndt::base_string_type *>(utf8_tp.extended()); sdt->get_string_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata()); break; } } break; } case bytes_kind: { out_tmp_ref = json.eval(); const ndt::base_bytes_type *bdt = json_type.extended<ndt::base_bytes_type>(); bdt->get_bytes_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata()); break; } default: { stringstream ss; ss << "Input for JSON parsing must be either bytes (interpreted as UTF-8) " "or a string, not \"" << json_type << "\""; throw runtime_error(ss.str()); break; } } }
void dynd::parse_json(nd::array &out, const char *json_begin, const char *json_end, const eval::eval_context *ectx) { try { const char *begin = json_begin, *end = json_end; ndt::type tp = out.get_type(); ::parse_json(tp, out.get()->metadata(), out.data(), begin, end, ectx); begin = skip_whitespace(begin, end); if (begin != end) { throw json_parse_error(begin, "unexpected trailing JSON text", tp); } } catch (const json_parse_error &e) { stringstream ss; std::string line_prev, line_cur; int line, column; get_error_line_column(json_begin, json_end, e.get_position(), line_prev, line_cur, line, column); ss << "Error parsing JSON at line " << line << ", column " << column << "\n"; ss << "DyND Type: " << e.get_type() << "\n"; ss << "Message: " << e.what() << "\n"; print_json_parse_error_marker(ss, line_prev, line_cur, line, column); throw invalid_argument(ss.str()); } catch (const parse::parse_error &e) { stringstream ss; std::string line_prev, line_cur; int line, column; get_error_line_column(json_begin, json_end, e.get_position(), line_prev, line_cur, line, column); ss << "Error parsing JSON at line " << line << ", column " << column << "\n"; ss << "Message: " << e.what() << "\n"; print_json_parse_error_marker(ss, line_prev, line_cur, line, column); throw invalid_argument(ss.str()); } }
dynd::nd::array pydynd::nd_fields(const nd::array &n, PyObject *field_list) { vector<std::string> selected_fields; pyobject_as_vector_string(field_list, selected_fields); // TODO: Move this implementation into dynd ndt::type fdt = n.get_dtype(); if (fdt.get_kind() != struct_kind) { stringstream ss; ss << "nd.fields must be given a dynd array of 'struct' kind, not "; ss << fdt; throw runtime_error(ss.str()); } const ndt::struct_type *bsd = fdt.extended<ndt::struct_type>(); if (selected_fields.empty()) { throw runtime_error( "nd.fields requires at least one field name to be specified"); } // Construct the field mapping and output field types vector<intptr_t> selected_index(selected_fields.size()); vector<ndt::type> selected__types(selected_fields.size()); for (size_t i = 0; i != selected_fields.size(); ++i) { selected_index[i] = bsd->get_field_index(selected_fields[i]); if (selected_index[i] < 0) { stringstream ss; ss << "field name "; print_escaped_utf8_string(ss, selected_fields[i]); ss << " does not exist in dynd type " << fdt; throw runtime_error(ss.str()); } selected__types[i] = bsd->get_field_type(selected_index[i]); } // Create the result udt ndt::type rudt = ndt::struct_type::make(selected_fields, selected__types); ndt::type result_tp = n.get_type().with_replaced_dtype(rudt); const ndt::struct_type *rudt_bsd = rudt.extended<ndt::struct_type>(); // Allocate the new memory block. size_t arrmeta_size = result_tp.get_arrmeta_size(); nd::array result(reinterpret_cast<array_preamble *>( make_array_memory_block(arrmeta_size).get()), true); // Clone the data pointer result.get()->data = n.get()->data; result.get()->owner = n.get()->owner; if (!result.get()->owner) { result.get()->owner = n.get(); } // Copy the flags result.get()->flags = n.get()->flags; // Set the type and transform the arrmeta result.get()->tp = result_tp; // First copy all the array data type arrmeta ndt::type tmp_dt = result_tp; char *dst_arrmeta = result.get()->metadata(); const char *src_arrmeta = n.get()->metadata(); while (tmp_dt.get_ndim() > 0) { if (tmp_dt.get_kind() != dim_kind) { throw runtime_error( "nd.fields doesn't support dimensions with pointers yet"); } const ndt::base_dim_type *budd = tmp_dt.extended<ndt::base_dim_type>(); size_t offset = budd->arrmeta_copy_construct_onedim( dst_arrmeta, src_arrmeta, intrusive_ptr<memory_block_data>(n.get(), true)); dst_arrmeta += offset; src_arrmeta += offset; tmp_dt = budd->get_element_type(); } // Then create the arrmeta for the new struct const size_t *arrmeta_offsets = bsd->get_arrmeta_offsets_raw(); const size_t *result_arrmeta_offsets = rudt_bsd->get_arrmeta_offsets_raw(); const size_t *data_offsets = bsd->get_data_offsets(src_arrmeta); size_t *result_data_offsets = reinterpret_cast<size_t *>(dst_arrmeta); for (size_t i = 0; i != selected_fields.size(); ++i) { const ndt::type &dt = selected__types[i]; // Copy the data offset result_data_offsets[i] = data_offsets[selected_index[i]]; // Copy the arrmeta for this field if (dt.get_arrmeta_size() > 0) { dt.extended()->arrmeta_copy_construct( dst_arrmeta + result_arrmeta_offsets[i], src_arrmeta + arrmeta_offsets[selected_index[i]], intrusive_ptr<memory_block_data>(n.get(), true)); } } return result; }
ndt::categorical_type::categorical_type(const nd::array &categories, bool presorted) : base_type(categorical_id, 4, 4, type_flag_none, 0, 0, 0) { intptr_t category_count; if (presorted) { // This is construction shortcut, for the case when the categories are // already // sorted. No validation of this is done, the caller should have ensured it // was correct already, typically by construction. m_categories = categories.eval_immutable(); m_category_tp = m_categories.get_type().at(0); category_count = categories.get_dim_size(); m_value_to_category_index = nd::range(category_count); m_value_to_category_index.flag_as_immutable(); m_category_index_to_value = m_value_to_category_index; } else { // Process the categories array to make sure it's valid const type &cdt = categories.get_type(); if (cdt.get_id() != fixed_dim_id) { throw dynd::type_error("categorical_type only supports construction from " "a fixed-dim array of categories"); } m_category_tp = categories.get_type().at(0); if (!m_category_tp.is_scalar()) { throw dynd::type_error("categorical_type only supports construction from " "a 1-dimensional strided array of categories"); } category_count = categories.get_dim_size(); intptr_t categories_stride = reinterpret_cast<const fixed_dim_type_arrmeta *>(categories.get()->metadata())->stride; const char *categories_element_arrmeta = categories.get()->metadata() + sizeof(fixed_dim_type_arrmeta); nd::kernel_builder k; kernel_single_t fn = k.get()->get_function<kernel_single_t>(); cmp less(fn, k.get()); set<const char *, cmp> uniques(less); m_value_to_category_index = nd::empty(category_count, make_type<intptr_t>()); m_category_index_to_value = nd::empty(category_count, make_type<intptr_t>()); // create the mapping from indices of (to be lexicographically sorted) // categories to values for (size_t i = 0; i != (size_t)category_count; ++i) { unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, i) = i; const char *category_value = categories.cdata() + i * categories_stride; if (uniques.find(category_value) == uniques.end()) { uniques.insert(category_value); } else { stringstream ss; ss << "categories must be unique: category value "; m_category_tp.print_data(ss, categories_element_arrmeta, category_value); ss << " appears more than once"; throw std::runtime_error(ss.str()); } } // TODO: Putting everything in a set already caused a sort operation to // occur, // there's no reason we should need a second sort. std::sort(&unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, 0), &unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, category_count), sorter(categories.cdata(), categories_stride, fn, k.get())); // invert the m_category_index_to_value permutation for (intptr_t i = 0; i < category_count; ++i) { unchecked_fixed_dim_get_rw<intptr_t>(m_value_to_category_index, unchecked_fixed_dim_get<intptr_t>(m_category_index_to_value, i)) = i; } m_categories = make_sorted_categories(uniques, m_category_tp, categories_element_arrmeta); } // Use the number of categories to set which underlying integer storage to use if (category_count <= 256) { m_storage_type = make_type<uint8_t>(); } else if (category_count <= 65536) { m_storage_type = make_type<uint16_t>(); } else { m_storage_type = make_type<uint32_t>(); } this->data_size = m_storage_type.get_data_size(); this->data_alignment = (uint8_t)m_storage_type.get_data_alignment(); }