void THNN_(SpatialUpSamplingBilinear_updateOutput)( THNNState *state, THTensor *input, THTensor *output, int outputHeight, int outputWidth){ int nbatch = THTensor_(size)(input, 0); int channels = THTensor_(size)(input, 1); int inputHeight = THTensor_(size)(input, 2); int inputWidth = THTensor_(size)(input, 3); THNN_(SpatialUpSamplingBilinear_shapeCheck) (input, NULL, nbatch, channels, inputHeight, inputWidth, outputHeight, outputWidth); input = THTensor_(newContiguous)(input); THTensor_(resize4d)(output, THTensor_(size)(input, 0), THTensor_(size)(input, 1), outputHeight, outputWidth); THTensor_(zero)(output); real *idata = THTensor_(data)(input); real *odata = THTensor_(data)(output); channels = nbatch * channels; THAssert(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0); // special case: just copy if (inputHeight == outputHeight && inputWidth == outputWidth) { for (int h2 = 0; h2 < outputHeight; ++h2) { const int h1 = h2; for (int w2 = 0; w2 < outputWidth; ++w2) { const int w1 = w2; const real* pos1 = &idata[h1 * inputWidth + w1]; real* pos2 = &odata[h2 * outputWidth + w2]; for (int c = 0; c < channels; ++c) { pos2[0] = pos1[0]; pos1 += inputWidth * inputHeight; pos2 += outputWidth * outputHeight; } } } return; } const float rheight =(outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f; const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1) / (outputWidth - 1) : 0.f; for (int h2 = 0; h2 < outputHeight; ++h2) { const float h1r = rheight * h2; const int h1 = h1r; const int h1p = (h1 < inputHeight - 1) ? 1 : 0; const real h1lambda = h1r - h1; const real h0lambda = (real)1. - h1lambda; for (int w2 = 0; w2 < outputWidth; ++w2) { const float w1r = rwidth * w2; const int w1 = w1r; const int w1p = (w1 < inputWidth - 1) ? 1 : 0; const real w1lambda = w1r - w1; const real w0lambda = (real)1. - w1lambda; const real* pos1 = &idata[h1 * inputWidth + w1]; real* pos2 = &odata[h2 * outputWidth + w2]; for (int c = 0; c < channels; ++c) { pos2[0] = h0lambda * (w0lambda * pos1[0]+ w1lambda * pos1[w1p]) + h1lambda * (w0lambda * pos1[h1p * inputWidth] + w1lambda * pos1[h1p * inputWidth + w1p]); pos1 += inputWidth * inputHeight; pos2 += outputWidth * outputHeight; } } } THTensor_(free)(input); }
void THNN_(ClassNLLCriterion_updateGradInput)(THNNState *state, THTensor *input, THIndexTensor *target, THTensor *gradInput, bool sizeAverage, THTensor *weights, THTensor *total_weight) { int n_dims = THTensor_(nDimension)(input); int n_classes = THTensor_(size)(input, n_dims - 1); if (!THTensor_(isContiguous)(gradInput)) { THError("gradInput must be contiguous"); } real *total_weight_data = THTensor_(data)(total_weight); if (!(*total_weight_data > 0)) { return; } if (THIndexTensor_(nDimension)(target) > 1) { THError("multi-target not supported"); } if (THTensor_(nDimension)(input) > 2) { THError("input tensor should be 1D or 2D"); } target = THIndexTensor_(newContiguous)(target); weights = weights ? THTensor_(newContiguous)(weights) : NULL; THIndex_t *target_data = THIndexTensor_(data)(target); real *weights_data = weights ? THTensor_(data)(weights) : NULL; real *gradInput_data = THTensor_(data)(gradInput); if (THTensor_(nDimension)(input) == 1) { int cur_target = target_data[0] - 1; THAssert(cur_target >= 0 && cur_target < n_classes); gradInput_data[cur_target] = (!sizeAverage && weights) ? -weights_data[cur_target] : -1; } else if (THTensor_(nDimension)(input) == 2) { int batch_size = THTensor_(size)(input, 0); THAssert(THIndexTensor_(size)(target, 0) == batch_size); int n_target = THTensor_(size)(input, 1); int i; for (i = 0; i < batch_size; i++){ int cur_target = target_data[i] - 1; THAssert(cur_target >= 0 && cur_target < n_classes); gradInput_data[i * n_target + cur_target] = -(weights ? weights_data[cur_target] : 1.0f); if (sizeAverage && *total_weight_data) { gradInput_data[i * n_target + cur_target] /= *total_weight_data; } } } THIndexTensor_(free)(target); if (weights) { THTensor_(free)(weights); } }
void THCStorage_resize(THCState *state, THCStorage *self, ptrdiff_t size) { THArgCheck(size >= 0, 2, "invalid size"); THAssert(self->allocator != NULL); int device; THCudaCheck(cudaGetDevice(&device)); if(!(self->flag & TH_STORAGE_RESIZABLE)) THError("Trying to resize storage that is not resizable"); size_t elementSize = at::elementSize(self->scalar_type); if (self->allocator->realloc) { void * data_ptr = self->data_ptr; cudaError_t err = (*self->allocator->realloc)( self->allocatorContext, (void**)&(data_ptr), self->size * elementSize, size * elementSize, THCState_getCurrentStreamOnDevice(state, device)); if (err != cudaSuccess) { THCudaCheck(err); } self->size = size; self->device = device; return; } if(size == 0) { if(self->flag & TH_STORAGE_FREEMEM) { THCudaCheck( (*self->allocator->free)(self->allocatorContext, self->data_ptr)); } self->data_ptr = NULL; self->size = 0; self->device = device; } else { void *data = NULL; cudaError_t err = (*self->allocator->malloc)(self->allocatorContext, (void**)&(data), size * elementSize, THCState_getCurrentStreamOnDevice(state, device)); THCudaCheck(err); if (self->data_ptr) { // Enable p2p access when the memcpy is across devices THCState_getPeerToPeerAccess(state, device, self->device); THCudaCheck(cudaMemcpyAsync(data, self->data_ptr, THMin(self->size, size) * elementSize, cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); if(self->flag & TH_STORAGE_FREEMEM) { THCudaCheck( (*self->allocator->free)(self->allocatorContext, self->data_ptr)); } } self->data_ptr = data; self->size = size; self->device = device; } }