Esempio n. 1
0
// Allocates memory
void factor_vm::primitive_get_samples() {
  if (atomic::load(&sampling_profiler_p) || samples.empty()) {
    ctx->push(false_object);
  } else {
    data_root<array> samples_array(allot_array(samples.size(), false_object),
                                   this);
    std::vector<profiling_sample>::const_iterator from_iter = samples.begin();
    cell to_i = 0;

    for (; from_iter != samples.end(); ++from_iter, ++to_i) {
      data_root<array> sample(allot_array(7, false_object), this);

      set_array_nth(sample.untagged(), 0,
                    tag_fixnum(from_iter->counts.sample_count));
      set_array_nth(sample.untagged(), 1,
                    tag_fixnum(from_iter->counts.gc_sample_count));
      set_array_nth(sample.untagged(), 2,
                    tag_fixnum(from_iter->counts.jit_sample_count));
      set_array_nth(sample.untagged(), 3,
                    tag_fixnum(from_iter->counts.foreign_sample_count));
      set_array_nth(sample.untagged(), 4,
                    tag_fixnum(from_iter->counts.foreign_thread_sample_count));

      set_array_nth(sample.untagged(), 5, from_iter->thread);

      cell callstack_size =
          from_iter->callstack_end - from_iter->callstack_begin;
      data_root<array> callstack(allot_array(callstack_size, false_object),
                                 this);

      std::vector<cell>::const_iterator callstacks_begin =
                                            sample_callstacks.begin(),
                                        c_from_iter =
                                            callstacks_begin +
                                            from_iter->callstack_begin,
                                        c_from_iter_end =
                                            callstacks_begin +
                                            from_iter->callstack_end;
      cell c_to_i = 0;

      for (; c_from_iter != c_from_iter_end; ++c_from_iter, ++c_to_i)
        set_array_nth(callstack.untagged(), c_to_i, *c_from_iter);

      set_array_nth(sample.untagged(), 6, callstack.value());

      set_array_nth(samples_array.untagged(), to_i, sample.value());
    }
    ctx->push(samples_array.value());
  }
}
Esempio n. 2
0
static PyObject *py_kmeans_cuda(PyObject *self, PyObject *args, PyObject *kwargs) {
  uint32_t clusters_size = 0, seed = static_cast<uint32_t>(time(NULL)), device = 0;
  int32_t verbosity = 0;
  float tolerance = .0, yinyang_t = .1;
  PyObject *kmpp = Py_False;
  PyObject *samples_obj;
  static const char *kwlist[] = {"samples", "clusters", "tolerance", "kmpp",
                                 "yinyang_t", "seed", "device", "verbosity", NULL};

  /* Parse the input tuple */
  if (!PyArg_ParseTupleAndKeywords(
      args, kwargs, "OI|fO!fIIi", const_cast<char**>(kwlist),
      &samples_obj, &clusters_size, &tolerance, &PyBool_Type, &kmpp, &yinyang_t,
      &seed, &device, &verbosity)) {
    return NULL;
  }
  if (clusters_size < 2 || clusters_size == UINT32_MAX) {
    PyErr_SetString(PyExc_ValueError, "\"clusters\" must be greater than 1 and "
                                      "less than (1 << 32) - 1");
    return NULL;
  }
  pyobj samples_array(PyArray_FROM_OTF(samples_obj, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY));
  if (samples_array == NULL) {
    PyErr_SetString(PyExc_TypeError, "\"samples\" must be a 2D numpy array");
    return NULL;
  }
  auto ndims = PyArray_NDIM(reinterpret_cast<PyArrayObject*>(samples_array.get()));
  if (ndims != 2) {
    PyErr_SetString(PyExc_ValueError, "\"samples\" must be a 2D numpy array");
    return NULL;
  }
  auto dims = PyArray_DIMS(reinterpret_cast<PyArrayObject*>(samples_array.get()));
  uint32_t samples_size = static_cast<uint32_t>(dims[0]);
  uint32_t features_size = static_cast<uint32_t>(dims[1]);
  if (features_size > UINT16_MAX) {
    char msg[128];
    sprintf(msg, "\"samples\": more than %" PRIu32 " features is not supported",
            features_size);
    PyErr_SetString(PyExc_ValueError, msg);
    return NULL;
  }
  float *samples = reinterpret_cast<float*>(PyArray_DATA(
      reinterpret_cast<PyArrayObject*>(samples_array.get())));
  npy_intp centroid_dims[] = {clusters_size, features_size, 0};
  auto centroids_array = PyArray_EMPTY(2, centroid_dims, NPY_FLOAT32, false);
  float *centroids = reinterpret_cast<float*>(PyArray_DATA(
      reinterpret_cast<PyArrayObject*>(centroids_array)));
  npy_intp assignments_dims[] = {samples_size, 0};
  auto assignments_array = PyArray_EMPTY(1, assignments_dims, NPY_UINT32, false);
  uint32_t *assignments = reinterpret_cast<uint32_t*>(PyArray_DATA(
      reinterpret_cast<PyArrayObject*>(assignments_array)));

  int result;
  Py_BEGIN_ALLOW_THREADS
  result = kmeans_cuda(
      kmpp == Py_True, tolerance, yinyang_t, samples_size,
      static_cast<uint16_t>(features_size), clusters_size, seed, device,
      verbosity, -1, samples, centroids, assignments);
  Py_END_ALLOW_THREADS

  switch (result) {
    case kmcudaInvalidArguments:
      PyErr_SetString(PyExc_ValueError,
                      "Invalid arguments were passed to kmeans_cuda");
      return NULL;
    case kmcudaNoSuchDevice:
      PyErr_SetString(PyExc_ValueError, "No such CUDA device exists");
      return NULL;
    case kmcudaMemoryAllocationFailure:
      PyErr_SetString(PyExc_MemoryError,
                      "Failed to allocate memory on GPU");
      return NULL;
    case kmcudaMemoryCopyError:
      PyErr_SetString(PyExc_RuntimeError, "cudaMemcpy failed");
      return NULL;
    case kmcudaRuntimeError:
      PyErr_SetString(PyExc_AssertionError, "kmeans_cuda failure (bug?)");
      return NULL;
    case kmcudaSuccess:
      return Py_BuildValue("OO", centroids_array, assignments_array);
    default:
      PyErr_SetString(PyExc_AssertionError,
                      "Unknown error code returned from kmeans_cuda");
      return NULL;
  }
}