// Allocates memory void factor_vm::primitive_get_samples() { if (atomic::load(&sampling_profiler_p) || samples.empty()) { ctx->push(false_object); } else { data_root<array> samples_array(allot_array(samples.size(), false_object), this); std::vector<profiling_sample>::const_iterator from_iter = samples.begin(); cell to_i = 0; for (; from_iter != samples.end(); ++from_iter, ++to_i) { data_root<array> sample(allot_array(7, false_object), this); set_array_nth(sample.untagged(), 0, tag_fixnum(from_iter->counts.sample_count)); set_array_nth(sample.untagged(), 1, tag_fixnum(from_iter->counts.gc_sample_count)); set_array_nth(sample.untagged(), 2, tag_fixnum(from_iter->counts.jit_sample_count)); set_array_nth(sample.untagged(), 3, tag_fixnum(from_iter->counts.foreign_sample_count)); set_array_nth(sample.untagged(), 4, tag_fixnum(from_iter->counts.foreign_thread_sample_count)); set_array_nth(sample.untagged(), 5, from_iter->thread); cell callstack_size = from_iter->callstack_end - from_iter->callstack_begin; data_root<array> callstack(allot_array(callstack_size, false_object), this); std::vector<cell>::const_iterator callstacks_begin = sample_callstacks.begin(), c_from_iter = callstacks_begin + from_iter->callstack_begin, c_from_iter_end = callstacks_begin + from_iter->callstack_end; cell c_to_i = 0; for (; c_from_iter != c_from_iter_end; ++c_from_iter, ++c_to_i) set_array_nth(callstack.untagged(), c_to_i, *c_from_iter); set_array_nth(sample.untagged(), 6, callstack.value()); set_array_nth(samples_array.untagged(), to_i, sample.value()); } ctx->push(samples_array.value()); } }
static PyObject *py_kmeans_cuda(PyObject *self, PyObject *args, PyObject *kwargs) { uint32_t clusters_size = 0, seed = static_cast<uint32_t>(time(NULL)), device = 0; int32_t verbosity = 0; float tolerance = .0, yinyang_t = .1; PyObject *kmpp = Py_False; PyObject *samples_obj; static const char *kwlist[] = {"samples", "clusters", "tolerance", "kmpp", "yinyang_t", "seed", "device", "verbosity", NULL}; /* Parse the input tuple */ if (!PyArg_ParseTupleAndKeywords( args, kwargs, "OI|fO!fIIi", const_cast<char**>(kwlist), &samples_obj, &clusters_size, &tolerance, &PyBool_Type, &kmpp, &yinyang_t, &seed, &device, &verbosity)) { return NULL; } if (clusters_size < 2 || clusters_size == UINT32_MAX) { PyErr_SetString(PyExc_ValueError, "\"clusters\" must be greater than 1 and " "less than (1 << 32) - 1"); return NULL; } pyobj samples_array(PyArray_FROM_OTF(samples_obj, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY)); if (samples_array == NULL) { PyErr_SetString(PyExc_TypeError, "\"samples\" must be a 2D numpy array"); return NULL; } auto ndims = PyArray_NDIM(reinterpret_cast<PyArrayObject*>(samples_array.get())); if (ndims != 2) { PyErr_SetString(PyExc_ValueError, "\"samples\" must be a 2D numpy array"); return NULL; } auto dims = PyArray_DIMS(reinterpret_cast<PyArrayObject*>(samples_array.get())); uint32_t samples_size = static_cast<uint32_t>(dims[0]); uint32_t features_size = static_cast<uint32_t>(dims[1]); if (features_size > UINT16_MAX) { char msg[128]; sprintf(msg, "\"samples\": more than %" PRIu32 " features is not supported", features_size); PyErr_SetString(PyExc_ValueError, msg); return NULL; } float *samples = reinterpret_cast<float*>(PyArray_DATA( reinterpret_cast<PyArrayObject*>(samples_array.get()))); npy_intp centroid_dims[] = {clusters_size, features_size, 0}; auto centroids_array = PyArray_EMPTY(2, centroid_dims, NPY_FLOAT32, false); float *centroids = reinterpret_cast<float*>(PyArray_DATA( reinterpret_cast<PyArrayObject*>(centroids_array))); npy_intp assignments_dims[] = {samples_size, 0}; auto assignments_array = PyArray_EMPTY(1, assignments_dims, NPY_UINT32, false); uint32_t *assignments = reinterpret_cast<uint32_t*>(PyArray_DATA( reinterpret_cast<PyArrayObject*>(assignments_array))); int result; Py_BEGIN_ALLOW_THREADS result = kmeans_cuda( kmpp == Py_True, tolerance, yinyang_t, samples_size, static_cast<uint16_t>(features_size), clusters_size, seed, device, verbosity, -1, samples, centroids, assignments); Py_END_ALLOW_THREADS switch (result) { case kmcudaInvalidArguments: PyErr_SetString(PyExc_ValueError, "Invalid arguments were passed to kmeans_cuda"); return NULL; case kmcudaNoSuchDevice: PyErr_SetString(PyExc_ValueError, "No such CUDA device exists"); return NULL; case kmcudaMemoryAllocationFailure: PyErr_SetString(PyExc_MemoryError, "Failed to allocate memory on GPU"); return NULL; case kmcudaMemoryCopyError: PyErr_SetString(PyExc_RuntimeError, "cudaMemcpy failed"); return NULL; case kmcudaRuntimeError: PyErr_SetString(PyExc_AssertionError, "kmeans_cuda failure (bug?)"); return NULL; case kmcudaSuccess: return Py_BuildValue("OO", centroids_array, assignments_array); default: PyErr_SetString(PyExc_AssertionError, "Unknown error code returned from kmeans_cuda"); return NULL; } }