static nd::array array_function_dereference(const nd::array &self)
{
    // Follow the pointers to eliminate them
    ndt::type dt = self.get_type();
    const char *arrmeta = self.get()->metadata();
    char *data = self.get()->data;
    memory_block_data *dataref = self.get()->owner.get();
    if (dataref == NULL) {
        dataref = self.get();
    }
    uint64_t flags = self.get()->flags;

    while (dt.get_type_id() == pointer_type_id) {
        const pointer_type_arrmeta *md = reinterpret_cast<const pointer_type_arrmeta *>(arrmeta);
        dt = dt.extended<ndt::pointer_type>()->get_target_type();
        arrmeta += sizeof(pointer_type_arrmeta);
        data = *reinterpret_cast<char **>(data) + md->offset;
        dataref = md->blockref.get();
    }

    // Create an array without the pointers
    nd::array result(make_array_memory_block(dt.get_arrmeta_size()));
    if (!dt.is_builtin()) {
        dt.extended()->arrmeta_copy_construct(result.get()->metadata(), arrmeta, self);
    }
    result.get()->type = dt.release();
    result.get()->data = data;
    result.get()->owner = dataref;
    result.get()->flags = flags;
    return result;
}
static void json_as_buffer(const nd::array &json, nd::array &out_tmp_ref, const char *&begin, const char *&end)
{
  // Check the type of 'json', and get pointers to the begin/end of a UTF-8
  // buffer
  ndt::type json_type = json.get_type().value_type();
  switch (json_type.get_kind()) {
  case string_kind: {
    const ndt::base_string_type *sdt = json_type.extended<ndt::base_string_type>();
    switch (sdt->get_encoding()) {
    case string_encoding_ascii:
    case string_encoding_utf_8:
      out_tmp_ref = json.eval();
      // The data is already UTF-8, so use the buffer directly
      sdt->get_string_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata());
      break;
    default: {
      // The data needs to be converted to UTF-8 before parsing
      ndt::type utf8_tp = ndt::string_type::make();
      out_tmp_ref = json.ucast(utf8_tp).eval();
      sdt = static_cast<const ndt::base_string_type *>(utf8_tp.extended());
      sdt->get_string_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata());
      break;
    }
    }
    break;
  }
  case bytes_kind: {
    out_tmp_ref = json.eval();
    const ndt::base_bytes_type *bdt = json_type.extended<ndt::base_bytes_type>();
    bdt->get_bytes_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata());
    break;
  }
  default: {
    stringstream ss;
    ss << "Input for JSON parsing must be either bytes (interpreted as UTF-8) "
          "or a string, not \"" << json_type << "\"";
    throw runtime_error(ss.str());
    break;
  }
  }
}
void dynd::parse_json(nd::array &out, const char *json_begin, const char *json_end, const eval::eval_context *ectx)
{
  try
  {
    const char *begin = json_begin, *end = json_end;
    ndt::type tp = out.get_type();
    ::parse_json(tp, out.get()->metadata(), out.data(), begin, end, ectx);
    begin = skip_whitespace(begin, end);
    if (begin != end) {
      throw json_parse_error(begin, "unexpected trailing JSON text", tp);
    }
  }
  catch (const json_parse_error &e)
  {
    stringstream ss;
    std::string line_prev, line_cur;
    int line, column;
    get_error_line_column(json_begin, json_end, e.get_position(), line_prev, line_cur, line, column);
    ss << "Error parsing JSON at line " << line << ", column " << column << "\n";
    ss << "DyND Type: " << e.get_type() << "\n";
    ss << "Message: " << e.what() << "\n";
    print_json_parse_error_marker(ss, line_prev, line_cur, line, column);
    throw invalid_argument(ss.str());
  }
  catch (const parse::parse_error &e)
  {
    stringstream ss;
    std::string line_prev, line_cur;
    int line, column;
    get_error_line_column(json_begin, json_end, e.get_position(), line_prev, line_cur, line, column);
    ss << "Error parsing JSON at line " << line << ", column " << column << "\n";
    ss << "Message: " << e.what() << "\n";
    print_json_parse_error_marker(ss, line_prev, line_cur, line, column);
    throw invalid_argument(ss.str());
  }
}
dynd::nd::array pydynd::nd_fields(const nd::array &n, PyObject *field_list)
{
  vector<std::string> selected_fields;
  pyobject_as_vector_string(field_list, selected_fields);

  // TODO: Move this implementation into dynd
  ndt::type fdt = n.get_dtype();
  if (fdt.get_kind() != struct_kind) {
    stringstream ss;
    ss << "nd.fields must be given a dynd array of 'struct' kind, not ";
    ss << fdt;
    throw runtime_error(ss.str());
  }
  const ndt::struct_type *bsd = fdt.extended<ndt::struct_type>();

  if (selected_fields.empty()) {
    throw runtime_error(
        "nd.fields requires at least one field name to be specified");
  }
  // Construct the field mapping and output field types
  vector<intptr_t> selected_index(selected_fields.size());
  vector<ndt::type> selected__types(selected_fields.size());
  for (size_t i = 0; i != selected_fields.size(); ++i) {
    selected_index[i] = bsd->get_field_index(selected_fields[i]);
    if (selected_index[i] < 0) {
      stringstream ss;
      ss << "field name ";
      print_escaped_utf8_string(ss, selected_fields[i]);
      ss << " does not exist in dynd type " << fdt;
      throw runtime_error(ss.str());
    }
    selected__types[i] = bsd->get_field_type(selected_index[i]);
  }
  // Create the result udt
  ndt::type rudt = ndt::struct_type::make(selected_fields, selected__types);
  ndt::type result_tp = n.get_type().with_replaced_dtype(rudt);
  const ndt::struct_type *rudt_bsd = rudt.extended<ndt::struct_type>();

  // Allocate the new memory block.
  size_t arrmeta_size = result_tp.get_arrmeta_size();
  nd::array result(reinterpret_cast<array_preamble *>(
                       make_array_memory_block(arrmeta_size).get()),
                   true);

  // Clone the data pointer
  result.get()->data = n.get()->data;
  result.get()->owner = n.get()->owner;
  if (!result.get()->owner) {
    result.get()->owner = n.get();
  }

  // Copy the flags
  result.get()->flags = n.get()->flags;

  // Set the type and transform the arrmeta
  result.get()->tp = result_tp;
  // First copy all the array data type arrmeta
  ndt::type tmp_dt = result_tp;
  char *dst_arrmeta = result.get()->metadata();
  const char *src_arrmeta = n.get()->metadata();
  while (tmp_dt.get_ndim() > 0) {
    if (tmp_dt.get_kind() != dim_kind) {
      throw runtime_error(
          "nd.fields doesn't support dimensions with pointers yet");
    }
    const ndt::base_dim_type *budd = tmp_dt.extended<ndt::base_dim_type>();
    size_t offset = budd->arrmeta_copy_construct_onedim(
        dst_arrmeta, src_arrmeta,
        intrusive_ptr<memory_block_data>(n.get(), true));
    dst_arrmeta += offset;
    src_arrmeta += offset;
    tmp_dt = budd->get_element_type();
  }
  // Then create the arrmeta for the new struct
  const size_t *arrmeta_offsets = bsd->get_arrmeta_offsets_raw();
  const size_t *result_arrmeta_offsets = rudt_bsd->get_arrmeta_offsets_raw();
  const size_t *data_offsets = bsd->get_data_offsets(src_arrmeta);
  size_t *result_data_offsets = reinterpret_cast<size_t *>(dst_arrmeta);
  for (size_t i = 0; i != selected_fields.size(); ++i) {
    const ndt::type &dt = selected__types[i];
    // Copy the data offset
    result_data_offsets[i] = data_offsets[selected_index[i]];
    // Copy the arrmeta for this field
    if (dt.get_arrmeta_size() > 0) {
      dt.extended()->arrmeta_copy_construct(
          dst_arrmeta + result_arrmeta_offsets[i],
          src_arrmeta + arrmeta_offsets[selected_index[i]],
          intrusive_ptr<memory_block_data>(n.get(), true));
    }
  }

  return result;
}
Exemple #5
0
ndt::categorical_type::categorical_type(const nd::array &categories, bool presorted)
    : base_type(categorical_id, 4, 4, type_flag_none, 0, 0, 0)
{
  intptr_t category_count;
  if (presorted) {
    // This is construction shortcut, for the case when the categories are
    // already
    // sorted. No validation of this is done, the caller should have ensured it
    // was correct already, typically by construction.
    m_categories = categories.eval_immutable();
    m_category_tp = m_categories.get_type().at(0);

    category_count = categories.get_dim_size();
    m_value_to_category_index = nd::range(category_count);
    m_value_to_category_index.flag_as_immutable();
    m_category_index_to_value = m_value_to_category_index;
  }
  else {
    // Process the categories array to make sure it's valid
    const type &cdt = categories.get_type();
    if (cdt.get_id() != fixed_dim_id) {
      throw dynd::type_error("categorical_type only supports construction from "
                             "a fixed-dim array of categories");
    }
    m_category_tp = categories.get_type().at(0);
    if (!m_category_tp.is_scalar()) {
      throw dynd::type_error("categorical_type only supports construction from "
                             "a 1-dimensional strided array of categories");
    }

    category_count = categories.get_dim_size();
    intptr_t categories_stride = reinterpret_cast<const fixed_dim_type_arrmeta *>(categories.get()->metadata())->stride;

    const char *categories_element_arrmeta = categories.get()->metadata() + sizeof(fixed_dim_type_arrmeta);
    nd::kernel_builder k;
    kernel_single_t fn = k.get()->get_function<kernel_single_t>();

    cmp less(fn, k.get());
    set<const char *, cmp> uniques(less);

    m_value_to_category_index = nd::empty(category_count, make_type<intptr_t>());
    m_category_index_to_value = nd::empty(category_count, make_type<intptr_t>());

    // create the mapping from indices of (to be lexicographically sorted)
    // categories to values
    for (size_t i = 0; i != (size_t)category_count; ++i) {
      unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, i) = i;
      const char *category_value = categories.cdata() + i * categories_stride;

      if (uniques.find(category_value) == uniques.end()) {
        uniques.insert(category_value);
      }
      else {
        stringstream ss;
        ss << "categories must be unique: category value ";
        m_category_tp.print_data(ss, categories_element_arrmeta, category_value);
        ss << " appears more than once";
        throw std::runtime_error(ss.str());
      }
    }
    // TODO: Putting everything in a set already caused a sort operation to
    // occur,
    //       there's no reason we should need a second sort.
    std::sort(&unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, 0),
              &unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, category_count),
              sorter(categories.cdata(), categories_stride, fn, k.get()));

    // invert the m_category_index_to_value permutation
    for (intptr_t i = 0; i < category_count; ++i) {
      unchecked_fixed_dim_get_rw<intptr_t>(m_value_to_category_index,
                                           unchecked_fixed_dim_get<intptr_t>(m_category_index_to_value, i)) = i;
    }

    m_categories = make_sorted_categories(uniques, m_category_tp, categories_element_arrmeta);
  }

  // Use the number of categories to set which underlying integer storage to use
  if (category_count <= 256) {
    m_storage_type = make_type<uint8_t>();
  }
  else if (category_count <= 65536) {
    m_storage_type = make_type<uint16_t>();
  }
  else {
    m_storage_type = make_type<uint32_t>();
  }
  this->data_size = m_storage_type.get_data_size();
  this->data_alignment = (uint8_t)m_storage_type.get_data_alignment();
}