static PyObject *unpack_saved_variables( THPFunction *self, std::function<PyObject*(const Variable&)> unpack_fn) { THPUtils_assert(!self->has_freed_buffers, ERR_BACKWARD_TWICE); auto& saved_variables = self->saved_variables; if (saved_variables.empty()) return PyTuple_New(0); int num_saved = saved_variables.size(); THPObjectPtr saved(PyTuple_New(num_saved)); if (!saved) return NULL; auto saved_for = THPFunction_asFunction(self); for (int i = 0; i < num_saved; i++) { auto unpacked_var = saved_variables[i].unpack(saved_for); THPObjectPtr value; if (!unpacked_var.defined()) { Py_INCREF(Py_None); value = Py_None; } else { value = unpack_fn(unpacked_var); } PyTuple_SET_ITEM(saved.get(), i, value.release()); } return saved.release(); }
static void _prepare_grad_output(THPFunction *self, THPObjectPtr& raw_grad_output) { AutoGPU gpu_guard(-1); int num_grad_output = PyTuple_GET_SIZE(raw_grad_output.get()); // First, check if any of grad_outputs is None. If not, there's nothing to do bool has_none = false; for (int i = 0; i < num_grad_output; i++) { has_none |= PyTuple_GET_ITEM(raw_grad_output.get(), i) == Py_None; } if (!has_none) return; THPObjectPtr grad_output; grad_output = PyTuple_New(num_grad_output); if (!grad_output) throw python_error(); // Look for Nones and replace them with new buffers auto& output_info = self->output_info; for (int i = 0; i < num_grad_output; i++) { PyObject *grad = PyTuple_GET_ITEM(raw_grad_output.get(), i); if (grad == Py_None) { grad = createPyObject(output_info[i].zeros(gpu_guard).data()); if (!grad) throw python_error(); } else { Py_INCREF(grad); } PyTuple_SET_ITEM(grad_output.get(), i, grad); } raw_grad_output = grad_output.release(); }
static std::vector<int64_t> compute_sizes(PyObject* seq) { std::vector<int64_t> sizes; THPObjectPtr handle; do { auto length = PySequence_Length(seq); if (length < 0) throw python_error(); sizes.push_back(length); if (sizes.size() > MAX_DIMS) { throw ValueError("too many dimensions '%s'", Py_TYPE(seq)->tp_name); } if (length == 0) break; handle = THPObjectPtr(PySequence_GetItem(seq, 0)); seq = handle.get(); } while (PySequence_Check(seq)); return sizes; }
PyObject *THPFunction_apply(PyObject *cls, PyObject *inputs) { HANDLE_TH_ERRORS torch::autograd::profiler::RecordFunction record(((PyTypeObject*)cls)->tp_name); THPObjectPtr backward_cls(PyObject_GetAttrString(cls, "_backward_cls")); if (!backward_cls) return NULL; THPObjectPtr ctx_obj(PyObject_CallFunctionObjArgs(backward_cls, NULL)); if (!ctx_obj) return NULL; THPFunction* ctx = (THPFunction*)ctx_obj.get(); // Prepare inputs and allocate context (grad fn) auto info_pair = unpack_input<false>(inputs); UnpackedInput& unpacked_input = info_pair.first; InputFlags& input_info = info_pair.second; // Initialize backward function (and ctx) bool is_volatile = input_info.flags.is_volatile; ctx->cdata.set_flags(std::move(input_info.flags)); ctx->needs_input_grad = input_info.needs_input_grad.release(); ctx->is_variable_input = std::move(input_info.is_variable_input); // Prepend ctx to tensor_input, in preparation for static method call auto num_args = PyTuple_GET_SIZE(inputs); THPObjectPtr ctx_tensor_input(PyTuple_New(num_args + 1)); PyTuple_SET_ITEM(ctx_tensor_input.get(), 0, ctx_obj.release()); for (int i = 0; i < num_args; ++i) { PyObject *arg = PyTuple_GET_ITEM(unpacked_input.tensor_input.get(), i); Py_INCREF(arg); PyTuple_SET_ITEM(ctx_tensor_input.get(), i + 1, arg); } // Call forward THPObjectPtr forward_fn(PyObject_GetAttrString(cls, "forward")); if (!forward_fn) return NULL; THPObjectPtr tensor_outputs(PyObject_CallObject(forward_fn, ctx_tensor_input)); if (!tensor_outputs) return NULL; THPObjectPtr outputs {process_outputs(cls, ctx, unpacked_input, inputs, std::move(tensor_outputs), is_volatile)}; return outputs.release(); END_HANDLE_TH_ERRORS }
static void _trim_grad_input(THPFunction *self, THPObjectPtr& grad_input) { int num_grads = PyTuple_GET_SIZE(grad_input.get()); int num_next_fns = self->cdata.next_functions.size(); if (num_grads > num_next_fns) { // Check that all extra grads are none bool all_none = true; for (int i = num_next_fns; i < num_grads; i++) { all_none = (PyTuple_GET_ITEM(grad_input.get(), i) == Py_None); if (!all_none) break; } // If yes, slice the tuple if (all_none) { num_grads = num_next_fns; grad_input = PyTuple_GetSlice(grad_input.get(), 0, num_grads); if (!grad_input) throw python_error(); } } }
PreTraceInfo preRecordPythonTrace(THPObjectPtr pyobj, std::string arg_types, at::ArrayRef<Variable> inputs, pyobj_list scalar_args) { THPObjectPtr apply(PyObject_GetAttrString(pyobj.get(), "apply")); if(!apply) { throw python_error(); } return makePreTraceInfo(inputs, [&](const std::shared_ptr<TracingState>& state, Graph& graph) { return graph.createPythonOp( std::move(apply), arg_types, std::move(scalar_args)); }); }
static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObject *kwargs) { HANDLE_TH_ERRORS Py_ssize_t num_args = args ? PyTuple_Size(args) : 0; THPStoragePtr self((THPStorage *)type->tp_alloc(type, 0)); THPUtils_assert(self, "failed to allocate a " THPStorageStr " object"); THAllocator* allocator = NULL; // Internally we allow constructing with a keywoard only argument cdata if (kwargs != NULL) { PyObject *allocator_ptr = PyDict_GetItemString(kwargs, "allocator"); if (allocator_ptr) { THPUtils_assert(THPUtils_checkLong(allocator_ptr), "invalid allocator"); allocator = (THAllocator*) PyLong_AsVoidPtr(allocator_ptr); PyDict_DelItemString(kwargs, "allocator"); } Py_ssize_t num_kwargs = PyDict_Size(kwargs); if (num_args == 0) { PyObject *cdata_ptr = PyDict_GetItemString(kwargs, "cdata"); if (num_kwargs == 1 && cdata_ptr && THPUtils_checkLong(cdata_ptr)) { THStorage *ptr = (THStorage*)PyLong_AsVoidPtr(cdata_ptr); self->cdata = ptr; return (PyObject*)self.release(); } } THPUtils_assert(num_kwargs == 0, THPStorageStr "(): invalid keyword arguments"); } // torch.Storage() if (num_args == 0) { if (allocator) { self->cdata = THPStorage_(newWithAllocator)(0, allocator); } else { self->cdata = THStorage_(new)(LIBRARY_STATE_NOARGS); } return (PyObject*)self.release(); } PyObject *first_arg = PyTuple_GET_ITEM(args, 0); // torch.Storage(size) if (num_args == 1 && THPUtils_checkLong(first_arg)) { int64_t size = THPUtils_unpackLong(first_arg); if (allocator) { self->cdata = THPStorage_(newWithAllocator)(size, allocator); } else { self->cdata = THStorage_(newWithSize)(LIBRARY_STATE size); } return (PyObject*)self.release(); } // torch.Storage(view_source, [offset, [size]]) if (num_args < 4 && THPStorage_(Check)(first_arg)) { #ifdef THD_GENERIC_FILE THPUtils_setError("distributed storages don't support storage views"); return NULL; #else THPStorage *storage_arg = (THPStorage *)first_arg; int64_t numel = storage_arg->cdata->size; int64_t offset = 0; if (num_args >= 2) { PyObject *second_arg = PyTuple_GET_ITEM(args, 1); if (!THPUtils_checkLong(second_arg)) goto invalid_arguments; offset = THPUtils_unpackLong(second_arg); } int64_t size = numel - offset; if (num_args >= 3) { PyObject *third_arg = PyTuple_GET_ITEM(args, 2); if (!THPUtils_checkLong(third_arg)) goto invalid_arguments; size = THPUtils_unpackLong(third_arg); } THPUtils_assert(offset >= 0 && offset <= numel, "specified an offset of " "%" PRId64 ", but the viewed storage has only %" PRId64 " element(s)", offset, numel); THPUtils_assert(size >= 1 && size <= numel - offset, "specified a size of " "%" PRId64 ", but the viewed storage has only %" PRId64 " element(s) after offset %" PRId64, size, numel - offset, offset); real *data_ptr = THStorage_(data)(LIBRARY_STATE storage_arg->cdata) + offset; THStoragePtr storage(THStorage_(newWithData)(LIBRARY_STATE data_ptr, size)); storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_VIEW; storage->view = storage_arg->cdata; THStorage_(retain)(LIBRARY_STATE storage_arg->cdata); self->cdata = storage.release(); return (PyObject*)self.release(); #endif } // torch.Storage(sequence) if (num_args == 1 && PySequence_Check(first_arg)) { #ifdef THD_GENERIC_FILE THPUtils_setError("distributed storages don't support construction from a sequence"); #else Py_ssize_t length = PySequence_Length(first_arg); THPUtils_assert(length >= 0, "couldn't obtain the length of %s", THPUtils_typename(first_arg)); self->cdata = THStorage_(newWithSize)(LIBRARY_STATE length); THPObjectPtr item; try { for (Py_ssize_t i = 0; i < length; i++) { item = PySequence_GetItem(first_arg, i); real value = THPUtils_(unpackReal)(item.get()); #if !defined(THC_GENERIC_FILE) self->cdata->unsafe_data<real>()[i] = value; #else // TODO: this might be slow - consider batched updates? THCStorage_(set)(LIBRARY_STATE self->cdata, i, value); #endif } } catch (std::runtime_error &e) { THPUtils_setError("tried to construct a storage from a sequence (%s), " "but one of the items was of type %s instead of %s", THPUtils_typename(first_arg), THPUtils_typename(item.get()), THPUtils_typeTraits<real>::python_type_str); return NULL; } return (PyObject*)self.release(); #endif } #ifndef THD_GENERIC_FILE invalid_arguments: #endif THPUtils_invalidArguments(args, kwargs, THPStorageStr " constructor", 6, "no arguments", "(int size)", "(Sequence data)", "(" THPStorageStr " view_source)", "(" THPStorageStr " view_source, int offset)", "(" THPStorageStr " view_source, int offset, int size)"); return NULL; END_HANDLE_TH_ERRORS }