void THBlas_(gemv)(char trans, int64_t m, int64_t n, real alpha, real *a, int64_t lda, real *x, int64_t incx, real beta, real *y, int64_t incy) { if(n == 1) lda = m; #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) && (incx > 0) && (incx <= INT_MAX) && (incy > 0) && (incy <= INT_MAX) ) { THArgCheck(lda >= THMax(1, m), 6, "lda should be at least max(1, m=%d), but have %d", m, lda); int i_m = (int)m; int i_n = (int)n; int i_lda = (int)lda; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) dgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy); #else sgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy); #endif return; } #endif { int64_t i, j; if( (trans == 'T') || (trans == 't') ) { for(i = 0; i < n; i++) { real sum = 0; real *row_ = a+lda*i; for(j = 0; j < m; j++) sum += x[j*incx]*row_[j]; if (beta == 0) y[i*incy] = alpha*sum; else y[i*incy] = beta*y[i*incy] + alpha*sum; } } else { if(beta != 1) THBlas_(scal)(m, beta, y, incy); for(j = 0; j < n; j++) { real *column_ = a+lda*j; real z = alpha*x[j*incx]; for(i = 0; i < m; i++) y[i*incy] += z*column_[i]; } } } }
void THNN_(LogSoftMax_updateOutput)(THNNState *state, THTensor *input, THTensor *output) { real *input_data, *output_data; long nframe = 0, dim = 0; long t, d; if (input->nDimension == 1) { nframe = 1; dim = input->size[0]; } else if (input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; } else { THArgCheck(0, 2, "vector or matrix expected"); } input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); real *input_data0 = THTensor_(data)(input); real *output_data0 = THTensor_(data)(output); accreal logsum; real maxInput; #pragma omp parallel for private(t, d, maxInput, logsum, input_data, output_data) for (t = 0; t < nframe; t++) { logsum = 0; maxInput = -THInf; input_data = input_data0 + dim*t; output_data = output_data0 + dim*t; for (d = 0; d < dim; d++) maxInput = THMax(maxInput, input_data[d]); for (d = 0; d < dim; d++) logsum += THExpMinusApprox(maxInput-input_data[d]); logsum = maxInput + log(logsum); for (d = 0; d < dim; d++) output_data[d] = input_data[d] - logsum; } THTensor_(free)(input); }
static int nn_(LogSoftMax_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); real *input_data, *output_data; long nframe = 0, dim = 0; long t, d; if(input->nDimension == 1) { nframe = 1; dim = input->size[0]; } else if(input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; } else THArgCheck(0, 2, "vector or matrix expected"); input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); for(t = 0; t < nframe; t++) { accreal logsum = 0; real maxInput = -THInf; for(d = 0; d < dim; d++) maxInput = THMax(maxInput, input_data[d]); for(d = 0; d < dim; d++) logsum += THExpMinusApprox(maxInput-input_data[d]); logsum = maxInput + log(logsum); for(d = 0; d < dim; d++) output_data[d] = input_data[d] - logsum; input_data += dim; output_data += dim; } THTensor_(free)(input); return 1; }
void THBlas_(ger)(int64_t m, int64_t n, real alpha, real *x, int64_t incx, real *y, int64_t incy, real *a, int64_t lda) { if(n == 1) lda = m; #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) && (incx > 0) && (incx <= INT_MAX) && (incy > 0) && (incy <= INT_MAX) ) { THArgCheck(lda >= THMax(1, m), 9, "lda should be at least max(1, m=%d), but have %d", m, lda); int i_m = (int)m; int i_n = (int)n; int i_lda = (int)lda; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) dger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda); #else sger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda); #endif return; } #endif { int64_t i, j; for(j = 0; j < n; j++) { real *column_ = a+j*lda; real z = alpha*y[j*incy]; for(i = 0; i < m; i++) column_[i] += z*x[i*incx] ; } } }
void THBlas_(gemm)(char transa, char transb, int64_t m, int64_t n, int64_t k, real alpha, real *a, int64_t lda, real *b, int64_t ldb, real beta, real *c, int64_t ldc) { int transa_ = ((transa == 't') || (transa == 'T')); int transb_ = ((transb == 't') || (transb == 'T')); if(n == 1) ldc = m; if(transa_) { if(m == 1) lda = k; } else { if(k == 1) lda = m; } if(transb_) { if(k == 1) ldb = n; } else { if(n == 1) ldb = k; } #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) && (lda <= INT_MAX) && (ldb <= INT_MAX) && (ldc <= INT_MAX) ) { THArgCheck(lda >= THMax(1, (transa_ ? k : m)), 8, "lda should be at least max(1, %d), but have %d", (transa_ ? k : m), lda); THArgCheck(ldb >= THMax(1, (transb_ ? n : k)), 10, "ldb should be at least max(1, %d), but have %d", (transb_ ? n : k), ldb); THArgCheck(ldc >= THMax(1, m), 13, "ldc should be at least max(1, m=%d), but have %d", m, ldc); int i_m = (int)m; int i_n = (int)n; int i_k = (int)k; int i_lda = (int)lda; int i_ldb = (int)ldb; int i_ldc = (int)ldc; #if defined(TH_REAL_IS_DOUBLE) dgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc); #else sgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc); #endif return; } #endif { int64_t i, j, l; if(!transa_ && !transb_) { real *a_ = a; for(i = 0; i < m; i++) { real *b_ = b; for(j = 0; j < n; j++) { real sum = 0; for(l = 0; l < k; l++) sum += a_[l*lda]*b_[l]; b_ += ldb; if (beta == 0) c[j*ldc+i] = alpha*sum; else c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; } a_++; } } else if(transa_ && !transb_) { real *a_ = a; for(i = 0; i < m; i++) { real *b_ = b; for(j = 0; j < n; j++) { real sum = 0; for(l = 0; l < k; l++) sum += a_[l]*b_[l]; b_ += ldb; if (beta == 0) c[j*ldc+i] = alpha*sum; else c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; } a_ += lda; } } else if(!transa_ && transb_) { real *a_ = a; for(i = 0; i < m; i++) { real *b_ = b; for(j = 0; j < n; j++) { real sum = 0; for(l = 0; l < k; l++) sum += a_[l*lda]*b_[l*ldb]; b_++; if (beta == 0) c[j*ldc+i] = alpha*sum; else c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; } a_++; } } else { real *a_ = a; for(i = 0; i < m; i++) { real *b_ = b; for(j = 0; j < n; j++) { real sum = 0; for(l = 0; l < k; l++) sum += a_[l]*b_[l*ldb]; b_++; if (beta == 0) c[j*ldc+i] = alpha*sum; else c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; } a_ += lda; } } } }