void LRNLayer<Dtype>::CrossChannelForward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); Dtype* scale_data = scale_.mutable_cpu_data(); Dtype alpha_over_size = alpha_ / size_; caffe_sqr(num_ * channels_ * height_ * width_, bottom_data, top_data); caffe_set(num_ * channels_ * height_ * width_, Dtype(k_), scale_data); #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif for (int n = 0; n < num_; n++) { for (int c = 0; c < channels_; c++) { for (int i = c - pre_pad_; i <= c + pre_pad_; i++) { if ((i >= 0) && (i < channels_)) { caffe_axpy<Dtype>(height_ * width_, alpha_over_size, top_data + scale_.offset(n, i), scale_data + scale_.offset(n, c)); } } } } caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, top_data); caffe_mul<Dtype>(scale_.count(), top_data, bottom_data, top_data); }
void LRNLayer<Dtype>::CrossChannelForward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); Dtype* scale_data = scale_.mutable_cpu_data(); // start with the constant value for (int i = 0; i < scale_.count(); ++i) { scale_data[i] = k_; // 初始化为1.0 } // 调用Blob的构造函数,定义一个补零后的Blob对象,其channels维度比bottom[0]增加了size_-1 Blob<Dtype> padded_square(1, channels_ + size_ - 1, height_, width_); Dtype* padded_square_data = padded_square.mutable_cpu_data(); caffe_set(padded_square.count(), Dtype(0), padded_square_data); // 清零 Dtype alpha_over_size = alpha_ / size_; // 公式中的alpha/n // go through the images for (int n = 0; n < num_; ++n) { // 循环遍历每一个样本 // compute the padded square y[i] = a[i]^2 //计算当前样本的bottom[0]的每个feature map中每个元素的平方,但前后pre_pad_个FM保持0 caffe_sqr(channels_ * height_ * width_, bottom_data + bottom[0]->offset(n), padded_square_data + padded_square.offset(0, pre_pad_)); // Create the first channel scale y[i] = alpha*x[i] + y[i] // 计算第一批size_个相邻的feature map(即padded_square中相邻的第0个到第size_个FM)的对应每个元素乘以因子后的累加和 // 并加上scale_中对应的值,最后得到的值存储中scale_中 // 即完成公式中k_+sum_under_i(x_i^2)的计算 for (int c = 0; c < size_; ++c) { caffe_axpy<Dtype>(height_ * width_, alpha_over_size, padded_square_data + padded_square.offset(0, c), scale_data + scale_.offset(n, 0)); } // 这里使用了类似FIFO的形式计算其余scale_参数, // 每次先复制上一次计算的padded_square的size_个相邻的feature map的结果,假设此时这size_个相邻的feature map的最后一个的标号是Index // 然后再加上第index+1个feature map的相应处理后的值 // 最后减去上一次求和计算中的第一个feature map的值,即第index-size_+1个feature map的值 // 以上三步操作,完成计算的是LRN层按顺序输出的一个feature map,所以需要for循环channels_1次 for (int c = 1; c < channels_; ++c) { // copy previous scale caffe_copy<Dtype>(height_ * width_, scale_data + scale_.offset(n, c - 1), scale_data + scale_.offset(n, c)); // add head caffe_axpy<Dtype>(height_ * width_, alpha_over_size, padded_square_data + padded_square.offset(0, c + size_ - 1), scale_data + scale_.offset(n, c)); // subtract tail caffe_axpy<Dtype>(height_ * width_, -alpha_over_size, padded_square_data + padded_square.offset(0, c - 1), scale_data + scale_.offset(n, c)); } } // In the end, compute output 完成最后的开根和求墒的操作, 并存入top[0]中 caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, top_data); caffe_mul<Dtype>(scale_.count(), top_data, bottom_data, top_data); // 矩阵乘法 }
void LRNLayer<Dtype>::CrossChannelForward_cpu( const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); Dtype* scale_data = scale_.mutable_cpu_data(); // start with the constant value for (int i = 0; i < scale_.count(); ++i) { scale_data[i] = 1.; } Blob<Dtype> padded_square(1, channels_ + size_ - 1, height_, width_); Dtype* padded_square_data = padded_square.mutable_cpu_data(); caffe_set(padded_square.count(), Dtype(0), padded_square_data); Dtype alpha_over_size = alpha_ / size_; // go through the images for (int n = 0; n < num_; ++n) { // compute the padded square caffe_sqr(channels_ * height_ * width_, bottom_data + bottom[0]->offset(n), padded_square_data + padded_square.offset(0, pre_pad_)); // Create the first channel scale for (int c = 0; c < size_; ++c) { caffe_axpy<Dtype>(height_ * width_, alpha_over_size, padded_square_data + padded_square.offset(0, c), scale_data + scale_.offset(n, 0)); } for (int c = 1; c < channels_; ++c) { // copy previous scale caffe_copy<Dtype>(height_ * width_, scale_data + scale_.offset(n, c - 1), scale_data + scale_.offset(n, c)); // add head caffe_axpy<Dtype>(height_ * width_, alpha_over_size, padded_square_data + padded_square.offset(0, c + size_ - 1), scale_data + scale_.offset(n, c)); // subtract tail caffe_axpy<Dtype>(height_ * width_, -alpha_over_size, padded_square_data + padded_square.offset(0, c - 1), scale_data + scale_.offset(n, c)); } // for (int i = 0; i < scale_.count(); ++i) { // if (scale_data[i] < 0 ) // LOG(FATAL) << "found negative norm term " << scale_data[i] << " @ " << i; // } } // In the end, compute output caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, top_data); caffe_mul<Dtype>(scale_.count(), top_data, bottom_data, top_data); }