void normalizeBackward_cpu(T* normalized, T const* data, T const* dzdy, size_t width, size_t height, size_t depth, size_t normDepth, T kappa, T alpha, T beta) { int m1 = ((signed)normDepth-1)/2 ; int m2 = normDepth - m1 - 1 ; int offset = width*height ; T ab2 = 2*alpha*beta ; for (int h = 0 ; h < height ; ++h) { for (int w = 0 ; w < width ; ++w) { int t, q ; T const* x = data + w + h * width ; T* y = normalized + w + h * width ; T const* z = dzdy + w + h * width ; T acc = 0 ; for (t = 0 ; t < (signed)depth ; ++t) { yat(t) = 0 ; } for (t = -m2 ; t < (signed)depth ; ++t) { int q1 = t-m1 ; int q2 = t+m2 ; T ap = 0 ; T am = 0 ; if (t-m1-1 >= 0) { am = xat(t-m1-1) ; } else { q1 = 0 ; } if (t+m2 < depth) { ap = xat(t+m2) ; } else { q2 = depth - 1 ; } acc += ap*ap - am*am ; T L = kappa + alpha * acc ; T Lbeta = pow(L, -beta) ; T Lbeta1 = Lbeta / L ; if (0 <= t && t < depth) { yat(t) += zat(t) * Lbeta ; for (q = q1 ; q <= q2 ; ++ q) { yat(q) -= zat(t) * xat(t) * xat(q) * ab2 * Lbeta1 ; } } } } } }
static vl::ErrorCode backward(type * output, type const* data, type const* derOutput, size_t width, size_t height, size_t depth, size_t num, size_t normDepth, type kappa, type alpha, type beta) { int m1 = ((signed)normDepth-1)/2 ; int m2 = (int)normDepth - m1 - 1 ; int offset = (int)width*(int)height ; type ab2 = 2*alpha*beta ; int t, q ; #ifndef VL_NNNORMALIZE_FAST for (int k = 0 ; k < num ; ++k) { for (int h = 0 ; h < height ; ++h) { for (int w = 0 ; w < width ; ++w) { type const* x = data + w + h * width ; T* y = output + w + h * width ; type const* z = derOutput + w + h * width ; type acc = 0 ; for (t = 0 ; t < (signed)depth ; ++t) { yat(t) = 0 ; } for (t = -m2 ; t < (signed)depth ; ++t) { int q1 = t-m1 ; int q2 = t+m2 ; type ap = 0 ; type am = 0 ; if (t-m1-1 >= 0) { am = xat(t-m1-1) ; } else { q1 = 0 ; } if (t+m2 < depth) { ap = xat(t+m2) ; } else { q2 = depth - 1 ; } acc += ap*ap - am*am ; type L = kappa + alpha * acc ; type Lbeta = fast_pow(L, -beta) ; type Lbeta1 = Lbeta / L ; if (0 <= t && t < depth) { yat(t) += zat(t) * Lbeta ; for (q = q1 ; q <= q2 ; ++ q) { yat(q) -= zat(t) * xat(t) * xat(q) * ab2 * Lbeta1 ; } } } } } data += width*height*depth ; output += width*height*depth ; derOutput += width*height*depth ; } #else type * restrict acc = (type*) malloc(sizeof(type) * width*height) ; type * restrict acc2 = (type*) malloc(sizeof(type) * width*height*depth) ; for (int k = 0 ; k < num ; ++k) { memset(acc, 0, sizeof(type) * width*height) ; for (t = -m2 ; t < (signed)depth ; ++t) { /* Compue the square of the input data x.^2 summed in the normalization window. This is done incrementally, by updating the previous normalization window sum. */ { int const tm = t - m1 - 1 ; int const tp = t + m2 ; type const* restrict datam_ = data + offset * tm ; type const* restrict datap_ = data + offset * tp ; type *end = acc + width*height ; if (0 <= tm && tp < depth) { for(type * restrict acc_ = acc ; acc_ != end ; ++acc_, ++datap_, ++datam_) { type am = *datam_ ; type ap = *datap_ ; *acc_ += ap*ap - am*am ; } } else if (0 > tm && tp < depth) { for(type * restrict acc_ = acc ; acc_ != end ; ++acc_, ++datap_) { type ap = *datap_ ; *acc_ += ap*ap ; } } else if (0 <= tm && tp >= depth) { for(type * restrict acc_ = acc ; acc_ != end ; ++acc_, ++datam_) { type am = *datam_ ; *acc_ -= am*am ; } } } /* Compute the arguments of the summation in the derivative expression, storing them into acc2. */ if (0 <= t && t < depth) { type const* restrict data_ = data + offset * t ; type const* restrict derOutput_ = derOutput + offset * t ; type * restrict output_ = output + offset * t ; type * restrict acc2_ = acc2 + offset * t ; type * end = acc + width*height ; for(type * restrict acc_ = acc ; acc_ != end ; ++acc_, ++acc2_, ++data_, ++derOutput_, ++output_) { type L = kappa + alpha * (*acc_) ; type Lbeta = fast_pow(L, -beta) ; type temp1 = (*derOutput_) * Lbeta ; type temp2 = (*data_) * ab2 * temp1 / L ; *output_ = temp1 ; *acc2_ = temp2 ; } } } /* Integrate along feature channels in acc2, summing plane t-1 to plane t. */ for (t = 1 ; t < (signed)depth ; ++t) { type * restrict acc2_ = acc2 + t * offset ; type const* restrict src_ = acc2_ - offset ; type const* end = acc2_ + offset ; for( ; acc2_ != end ; ++acc2_, ++src_) { *acc2_ += *src_ ; } } /* Compute summation in the derivative expression from the integral just obtained. */ for (t = 0 ; t < (signed)depth ; ++t) { int q1 = t - m2 - 1 ; int q2 = ((t + m1) <= (depth - 1)) ? t + m1 : depth - 1 ; type const* restrict acc22_ = acc2 + offset * q2 ; type const* restrict acc21_ = acc2 + offset * q1 ; type const* restrict data_ = data + offset * t ; type const* restrict end = data_ + width*height ; type * restrict output_ = output + offset * t ; if (q1 >= 0) { for( ; data_ != end ; ++data_, ++acc22_, ++acc21_, ++output_) { *output_ -= (*acc22_ - *acc21_) * (*data_) ; } } else { for( ; data_ != end ; ++data_, ++acc22_, ++output_) { *output_ -= (*acc22_) * (*data_) ; } } } data += width*height*depth ; output += width*height*depth ; derOutput += width*height*depth ; }