void THNN_(SoftMax_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *output, int64_t dim) { THNN_CHECK_SHAPE(output, gradOutput); THArgCheck(dim >= 0 && dim < output->nDimension, 6, "dim out of range (got %d, but input has %d dims)", dim, output->nDimension); uint64_t outer_size = 1; uint64_t dim_size = output->size[dim]; uint64_t inner_size = 1; for (int64_t i = 0; i < dim; ++i) outer_size *= output->size[i]; for (int64_t i = dim + 1; i < output->nDimension; ++i) inner_size *= output->size[i]; gradOutput = THTensor_(newContiguous)(gradOutput); output = THTensor_(newContiguous)(output); THTensor_(resizeAs)(gradInput, output); real *gradInput_data_base = THTensor_(data)(gradInput); real *output_data_base = THTensor_(data)(output); real *gradOutput_data_base = THTensor_(data)(gradOutput); uint64_t dim_stride = inner_size; uint64_t outer_stride = dim_size * dim_stride; SOFTMAX_SIZE_TYPE i, d; #pragma omp parallel for private(i, d) for (i = 0; i < SOFTMAX_CAST_TYPE (outer_size * inner_size); i++) { uint64_t outer_idx = i / inner_size; uint64_t inner_idx = i % inner_size; real *gradInput_data = gradInput_data_base + outer_idx * outer_stride + inner_idx; real *output_data = output_data_base + outer_idx * outer_stride + inner_idx; real *gradOutput_data = gradOutput_data_base + outer_idx * outer_stride + inner_idx; accreal sum = 0; for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++) sum += ((accreal)gradOutput_data[d * dim_stride]) * ((accreal)output_data[d * dim_stride]); for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++) gradInput_data[d * dim_stride] = output_data[d * dim_stride] * (gradOutput_data[d * dim_stride] - sum); } THTensor_(free)(gradOutput); THTensor_(free)(output); }
void THNN_(SmoothL1Criterion_updateOutput)( THNNState *state, THTensor *input, THTensor *target, THTensor *output, bool sizeAverage, bool reduce) { THNN_CHECK_SHAPE(input, target); if (!reduce) { THTensor_(resizeAs)(output, input); TH_TENSOR_APPLY3(real, input, real, target, real, output, real z = fabs(*input_data - *target_data); *output_data = z < 1 ? 0.5 * z * z : z - 0.5; );
void THNN_(SoftMax_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *output) { THNN_CHECK_SHAPE(input, gradOutput); real *gradInput_data, *gradOutput_data, *output_data; ptrdiff_t nframe = 0, dim = 0, stride = 0; ptrdiff_t t; if (output->nDimension == 1) { nframe = 1; dim = output->size[0]; stride = 1; } else if (output->nDimension == 2) { nframe = output->size[0]; dim = output->size[1]; stride = 1; } else if (output->nDimension == 3) { nframe = 1; dim = output->size[0]; stride = output->size[1]*output->size[2]; } else if (output->nDimension == 4) { nframe = output->size[0]; dim = output->size[1]; stride = output->size[2]*output->size[3]; } else { THError("1D, 2D, 3D or 4D tensor expected"); } gradOutput = THTensor_(newContiguous)(gradOutput); output = THTensor_(newContiguous)(output); THTensor_(resizeAs)(gradInput, output); gradInput_data = THTensor_(data)(gradInput); output_data = THTensor_(data)(output); gradOutput_data = THTensor_(data)(gradOutput); #pragma omp parallel for private(t) for (t = 0; t < stride*nframe; t++) { real *gradInput_ptr = gradInput_data + (t/stride)*dim*stride + t % stride; real *output_ptr = output_data + (t/stride)*dim*stride + t % stride; real *gradOutput_ptr = gradOutput_data + (t/stride)*dim*stride + t % stride; ptrdiff_t d; accreal sum = 0; for (d = 0; d < dim; d++) sum += (accreal)gradOutput_ptr[d*stride] * output_ptr[d*stride]; for (d = 0; d < dim; d++) gradInput_ptr[d*stride] = output_ptr[d*stride] * (gradOutput_ptr[d*stride] - sum); } THTensor_(free)(gradOutput); THTensor_(free)(output); }