ndt::type dynd::ndt::factor_categorical(const nd::array& values)
{
    // Do the factor operation on a concrete version of the values
    // TODO: Some cases where we don't want to do this?
    nd::array values_eval = values.eval();
    array_iter<0, 1> iter(values_eval);

    comparison_ckernel_builder k;
    ::make_comparison_kernel(&k, 0,
                    iter.get_uniform_dtype(), iter.arrmeta(),
                    iter.get_uniform_dtype(), iter.arrmeta(),
                    comparison_type_sorting_less, &eval::default_eval_context);

    cmp less(k.get_function(), k.get());
    set<const char *, cmp> uniques(less);

    if (!iter.empty()) {
        do {
            if (uniques.find(iter.data()) == uniques.end()) {
                uniques.insert(iter.data());
            }
        } while (iter.next());
    }

    // Copy the values (now sorted and unique) into a new nd::array
    nd::array categories = make_sorted_categories(uniques,
                    iter.get_uniform_dtype(), iter.arrmeta());

    return ndt::type(new categorical_type(categories, true), false);
}
Beispiel #2
0
nd::array dynd::format_json(const nd::array &n, bool struct_as_list)
{
  // Create a UTF-8 string
  nd::array result = nd::empty(ndt::string_type::make());

  // Initialize the output with some memory
  output_data out;
  out.out_string.resize(1024);
  out.out_begin = out.out_string.begin();
  out.out_capacity_end = out.out_string.end();
  out.out_end = out.out_begin;
  out.struct_as_list = struct_as_list;

  if (!n.get_type().is_expression()) {
    ::format_json(out, n.get_type(), n.get_arrmeta(), n.get_readonly_originptr());
  } else {
    nd::array tmp = n.eval();
    ::format_json(out, tmp.get_type(), tmp.get_arrmeta(), tmp.get_readonly_originptr());
  }

  // Shrink the memory to fit, and set the pointers in the output
  string *d = reinterpret_cast<string *>(result.get_readwrite_originptr());
  d->assign(out.out_string.data(), out.out_end - out.out_begin);

  // Finalize processing and mark the result as immutable
  result.get_type().extended()->arrmeta_finalize_buffers(result.get_arrmeta());
  result.flag_as_immutable();

  return result;
}
Beispiel #3
0
nd::array dynd::format_json(const nd::array& n)
{
    // Create a UTF-8 string
    nd::array result = nd::empty(ndt::make_string());

    // Initialize the output with some memory
    output_data out;
    out.blockref = reinterpret_cast<const string_type_metadata *>(result.get_ndo_meta())->blockref;
    out.api = get_memory_block_pod_allocator_api(out.blockref);
    out.api->allocate(out.blockref, 1024, 1, &out.out_begin, &out.out_capacity_end);
    out.out_end = out.out_begin;

    if (!n.get_type().is_expression()) {
        ::format_json(out, n.get_type(), n.get_ndo_meta(), n.get_readonly_originptr());
    } else {
        nd::array tmp = n.eval();
        ::format_json(out, tmp.get_type(), tmp.get_ndo_meta(), tmp.get_readonly_originptr());
    }

    // Shrink the memory to fit, and set the pointers in the output
    string_type_data *d = reinterpret_cast<string_type_data *>(result.get_readwrite_originptr());
    d->begin = out.out_begin;
    d->end = out.out_capacity_end;
    out.api->resize(out.blockref, out.out_end - out.out_begin, &d->begin, &d->end);

    // Finalize processing and mark the result as immutable
    result.get_type().extended()->metadata_finalize_buffers(result.get_ndo_meta());
    result.flag_as_immutable();

    return result;
}
Beispiel #4
0
ndt::type ndt::factor_categorical(const nd::array &values)
{
  // Do the factor operation on a concrete version of the values
  // TODO: Some cases where we don't want to do this?
  nd::array values_eval = values.eval();

  intptr_t dim_size, stride;
  type el_tp;
  const char *el_arrmeta;
  values_eval.get_type().get_as_strided(values_eval.get()->metadata(), &dim_size, &stride, &el_tp, &el_arrmeta);

  nd::kernel_builder k;
  kernel_single_t fn = k.get()->get_function<kernel_single_t>();

  cmp less(fn, k.get());
  set<const char *, cmp> uniques(less);

  for (intptr_t i = 0; i < dim_size; ++i) {
    const char *data = values_eval.cdata() + i * stride;
    if (uniques.find(data) == uniques.end()) {
      uniques.insert(data);
    }
  }

  // Copy the values (now sorted and unique) into a new nd::array
  nd::array categories = make_sorted_categories(uniques, el_tp, el_arrmeta);

  return type(new categorical_type(categories, true), false);
}
Beispiel #5
0
ndt::type ndt::factor_categorical(const nd::array &values)
{
  // Do the factor operation on a concrete version of the values
  // TODO: Some cases where we don't want to do this?
  nd::array values_eval = values.eval();

  intptr_t dim_size, stride;
  type el_tp;
  const char *el_arrmeta;
  values_eval.get_type().get_as_strided(values_eval.get_arrmeta(), &dim_size,
                                        &stride, &el_tp, &el_arrmeta);

  ckernel_builder<kernel_request_host> k;
  ::make_comparison_kernel(&k, 0, el_tp, el_arrmeta, el_tp, el_arrmeta,
                           comparison_type_sorting_less,
                           &eval::default_eval_context);
  expr_single_t fn = k.get()->get_function<expr_single_t>();

  cmp less(fn, k.get());
  set<const char *, cmp> uniques(less);

  for (intptr_t i = 0; i < dim_size; ++i) {
    const char *data = values_eval.get_readonly_originptr() + i * stride;
    if (uniques.find(data) == uniques.end()) {
      uniques.insert(data);
    }
  }

  // Copy the values (now sorted and unique) into a new nd::array
  nd::array categories = make_sorted_categories(uniques, el_tp, el_arrmeta);

  return type(new categorical_type(categories, true), false);
}
Beispiel #6
0
static void json_as_buffer(const nd::array &json, nd::array &out_tmp_ref,
                           const char *&begin, const char *&end)
{
  // Check the type of 'json', and get pointers to the begin/end of a UTF-8
  // buffer
  ndt::type json_type = json.get_type().value_type();
  switch (json_type.get_kind()) {
  case string_kind: {
    const ndt::base_string_type *sdt =
        json_type.extended<ndt::base_string_type>();
    switch (sdt->get_encoding()) {
    case string_encoding_ascii:
    case string_encoding_utf_8:
      out_tmp_ref = json.eval();
      // The data is already UTF-8, so use the buffer directly
      sdt->get_string_range(&begin, &end, out_tmp_ref.get_arrmeta(),
                            out_tmp_ref.get_readonly_originptr());
      break;
    default: {
      // The data needs to be converted to UTF-8 before parsing
      ndt::type utf8_tp = ndt::string_type::make(string_encoding_utf_8);
      out_tmp_ref = json.ucast(utf8_tp).eval();
      sdt = static_cast<const ndt::base_string_type *>(utf8_tp.extended());
      sdt->get_string_range(&begin, &end, out_tmp_ref.get_arrmeta(),
                            out_tmp_ref.get_readonly_originptr());
      break;
    }
    }
    break;
  }
  case bytes_kind: {
    out_tmp_ref = json.eval();
    const ndt::base_bytes_type *bdt =
        json_type.extended<ndt::base_bytes_type>();
    bdt->get_bytes_range(&begin, &end, out_tmp_ref.get_arrmeta(),
                         out_tmp_ref.get_readonly_originptr());
    break;
  }
  default: {
    stringstream ss;
    ss << "Input for JSON parsing must be either bytes (interpreted as UTF-8) "
          "or a string, not \"" << json_type << "\"";
    throw runtime_error(ss.str());
    break;
  }
  }
}