/**
 * Substitutes the field types for contiguous array of types
 */
static std::vector<ndt::type> substitute_type_array(const nd::array &type_array,
                                                    const std::map<std::string, ndt::type> &typevars, bool concrete) {
  intptr_t field_count = type_array.get_dim_size();
  const ndt::type *field_types = reinterpret_cast<const ndt::type *>(type_array.cdata());
  std::vector<ndt::type> tmp_field_types(field_count);

  for (intptr_t i = 0; i < field_count; ++i) {
    tmp_field_types[i] = ndt::substitute(field_types[i], typevars, concrete);
  }
  return tmp_field_types;
}
static void json_as_buffer(const nd::array &json, nd::array &out_tmp_ref, const char *&begin, const char *&end)
{
  // Check the type of 'json', and get pointers to the begin/end of a UTF-8
  // buffer
  ndt::type json_type = json.get_type().value_type();
  switch (json_type.get_kind()) {
  case string_kind: {
    const ndt::base_string_type *sdt = json_type.extended<ndt::base_string_type>();
    switch (sdt->get_encoding()) {
    case string_encoding_ascii:
    case string_encoding_utf_8:
      out_tmp_ref = json.eval();
      // The data is already UTF-8, so use the buffer directly
      sdt->get_string_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata());
      break;
    default: {
      // The data needs to be converted to UTF-8 before parsing
      ndt::type utf8_tp = ndt::string_type::make();
      out_tmp_ref = json.ucast(utf8_tp).eval();
      sdt = static_cast<const ndt::base_string_type *>(utf8_tp.extended());
      sdt->get_string_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata());
      break;
    }
    }
    break;
  }
  case bytes_kind: {
    out_tmp_ref = json.eval();
    const ndt::base_bytes_type *bdt = json_type.extended<ndt::base_bytes_type>();
    bdt->get_bytes_range(&begin, &end, out_tmp_ref.get()->metadata(), out_tmp_ref.cdata());
    break;
  }
  default: {
    stringstream ss;
    ss << "Input for JSON parsing must be either bytes (interpreted as UTF-8) "
          "or a string, not \"" << json_type << "\"";
    throw runtime_error(ss.str());
    break;
  }
  }
}
/**
 * Substitutes the field types for contiguous array of types
 */
static nd::array substitute_type_array(const nd::array &type_array, const std::map<std::string, ndt::type> &typevars,
                                       bool concrete)
{
  intptr_t field_count = type_array.get_dim_size();
  const ndt::type *field_types = reinterpret_cast<const ndt::type *>(type_array.cdata());
  nd::array tmp_field_types(nd::empty(field_count, ndt::make_type()));
  ndt::type *ftraw = reinterpret_cast<ndt::type *>(tmp_field_types.data());
  for (intptr_t i = 0; i < field_count; ++i) {
    ftraw[i] = ndt::substitute(field_types[i], typevars, concrete);
  }
  return tmp_field_types;
}
Exemple #4
0
nd::callable::callable(const nd::array &rhs)
{
  if (!rhs.is_null()) {
    if (rhs.get_type().get_type_id() == callable_type_id) {
      const callable_type_data *af =
          reinterpret_cast<const callable_type_data *>(
              rhs.cdata());
      if (af->instantiate != NULL) {
        // It's valid: callable type, contains instantiate function.
        m_value = rhs;
      } else {
        throw invalid_argument("Require a non-empty callable, "
                               "provided callable has NULL "
                               "instantiate function");
      }
    } else {
      stringstream ss;
      ss << "Cannot implicitly convert nd::array of type "
         << rhs.get_type().value_type() << " to  callable";
      throw type_error(ss.str());
    }
  }
}
Exemple #5
0
ndt::categorical_type::categorical_type(const nd::array &categories, bool presorted)
    : base_type(categorical_id, 4, 4, type_flag_none, 0, 0, 0)
{
  intptr_t category_count;
  if (presorted) {
    // This is construction shortcut, for the case when the categories are
    // already
    // sorted. No validation of this is done, the caller should have ensured it
    // was correct already, typically by construction.
    m_categories = categories.eval_immutable();
    m_category_tp = m_categories.get_type().at(0);

    category_count = categories.get_dim_size();
    m_value_to_category_index = nd::range(category_count);
    m_value_to_category_index.flag_as_immutable();
    m_category_index_to_value = m_value_to_category_index;
  }
  else {
    // Process the categories array to make sure it's valid
    const type &cdt = categories.get_type();
    if (cdt.get_id() != fixed_dim_id) {
      throw dynd::type_error("categorical_type only supports construction from "
                             "a fixed-dim array of categories");
    }
    m_category_tp = categories.get_type().at(0);
    if (!m_category_tp.is_scalar()) {
      throw dynd::type_error("categorical_type only supports construction from "
                             "a 1-dimensional strided array of categories");
    }

    category_count = categories.get_dim_size();
    intptr_t categories_stride = reinterpret_cast<const fixed_dim_type_arrmeta *>(categories.get()->metadata())->stride;

    const char *categories_element_arrmeta = categories.get()->metadata() + sizeof(fixed_dim_type_arrmeta);
    nd::kernel_builder k;
    kernel_single_t fn = k.get()->get_function<kernel_single_t>();

    cmp less(fn, k.get());
    set<const char *, cmp> uniques(less);

    m_value_to_category_index = nd::empty(category_count, make_type<intptr_t>());
    m_category_index_to_value = nd::empty(category_count, make_type<intptr_t>());

    // create the mapping from indices of (to be lexicographically sorted)
    // categories to values
    for (size_t i = 0; i != (size_t)category_count; ++i) {
      unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, i) = i;
      const char *category_value = categories.cdata() + i * categories_stride;

      if (uniques.find(category_value) == uniques.end()) {
        uniques.insert(category_value);
      }
      else {
        stringstream ss;
        ss << "categories must be unique: category value ";
        m_category_tp.print_data(ss, categories_element_arrmeta, category_value);
        ss << " appears more than once";
        throw std::runtime_error(ss.str());
      }
    }
    // TODO: Putting everything in a set already caused a sort operation to
    // occur,
    //       there's no reason we should need a second sort.
    std::sort(&unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, 0),
              &unchecked_fixed_dim_get_rw<intptr_t>(m_category_index_to_value, category_count),
              sorter(categories.cdata(), categories_stride, fn, k.get()));

    // invert the m_category_index_to_value permutation
    for (intptr_t i = 0; i < category_count; ++i) {
      unchecked_fixed_dim_get_rw<intptr_t>(m_value_to_category_index,
                                           unchecked_fixed_dim_get<intptr_t>(m_category_index_to_value, i)) = i;
    }

    m_categories = make_sorted_categories(uniques, m_category_tp, categories_element_arrmeta);
  }

  // Use the number of categories to set which underlying integer storage to use
  if (category_count <= 256) {
    m_storage_type = make_type<uint8_t>();
  }
  else if (category_count <= 65536) {
    m_storage_type = make_type<uint16_t>();
  }
  else {
    m_storage_type = make_type<uint32_t>();
  }
  this->data_size = m_storage_type.get_data_size();
  this->data_alignment = (uint8_t)m_storage_type.get_data_alignment();
}