void THNN_(LogSoftMax_updateOutput)(THNNState *state, THTensor *input, THTensor *output) { real *input_data, *output_data; long nframe = 0, dim = 0; long t, d; if (input->nDimension == 1) { nframe = 1; dim = input->size[0]; } else if (input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; } else { THArgCheck(0, 2, "vector or matrix expected"); } input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); real *input_data0 = THTensor_(data)(input); real *output_data0 = THTensor_(data)(output); accreal logsum; real maxInput; #pragma omp parallel for private(t, d, maxInput, logsum, input_data, output_data) for (t = 0; t < nframe; t++) { logsum = 0; maxInput = -THInf; input_data = input_data0 + dim*t; output_data = output_data0 + dim*t; for (d = 0; d < dim; d++) maxInput = THMax(maxInput, input_data[d]); for (d = 0; d < dim; d++) logsum += THExpMinusApprox(maxInput-input_data[d]); logsum = maxInput + log(logsum); for (d = 0; d < dim; d++) output_data[d] = input_data[d] - logsum; } THTensor_(free)(input); }
static int nn_(LogSoftMax_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); real *input_data, *output_data; long nframe = 0, dim = 0; long t, d; if(input->nDimension == 1) { nframe = 1; dim = input->size[0]; } else if(input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; } else THArgCheck(0, 2, "vector or matrix expected"); input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); for(t = 0; t < nframe; t++) { accreal logsum = 0; real maxInput = -THInf; for(d = 0; d < dim; d++) maxInput = THMax(maxInput, input_data[d]); for(d = 0; d < dim; d++) logsum += THExpMinusApprox(maxInput-input_data[d]); logsum = maxInput + log(logsum); for(d = 0; d < dim; d++) output_data[d] = input_data[d] - logsum; input_data += dim; output_data += dim; } THTensor_(free)(input); return 1; }
void THNN_(SoftMax_updateOutput)(THNNState *state, THTensor *input, THTensor *output) { real *input_data, *output_data; long nframe = 0, dim = 0, stride = 0; long t; if (input->nDimension == 1) { nframe = 1; dim = input->size[0]; stride = 1; } else if (input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; stride = 1; } else if (input->nDimension == 3) { nframe = 1; dim = input->size[0]; stride = input->size[1]*input->size[2]; } else if (input->nDimension == 4) { nframe = input->size[0]; dim = input->size[1]; stride = input->size[2]*input->size[3]; } else { THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected"); } input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(t) for (t = 0; t < stride*nframe; t++) { real *input_ptr = input_data + (t/stride)*dim*stride + t % stride; real *output_ptr = output_data + (t/stride)*dim*stride + t % stride; real inputMax = -THInf; accreal sum; long d; for (d = 0; d < dim; d++) { if (input_ptr[d*stride] >= inputMax) inputMax = input_ptr[d*stride]; } sum = 0; for (d = 0; d < dim; d++) { real z = THExpMinusApprox(inputMax - input_ptr[d*stride]); output_ptr[d*stride] = z; sum += z; } for (d = 0; d < dim; d++) { output_ptr[d*stride] *= 1/sum; } } THTensor_(free)(input); }