void EltwiseLayer<Dtype>::forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int* mask = NULL; const Dtype* bottom_data_a = NULL; const Dtype* bottom_data_b = NULL; const int count = top[0]->count(); Dtype* top_data = top[0]->mutable_cpu_data(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: dragon_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data); for (int i = 2; i < bottom.size(); ++i) dragon_mul(count, top_data, bottom[i]->cpu_data(), top_data); break; case EltwiseParameter_EltwiseOp_SUM: dragon_set(count, Dtype(0), top_data); // TODO(shelhamer) does BLAS optimize to sum for coeff = 1? for (int i = 0; i < bottom.size(); ++i) dragon_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); break; case EltwiseParameter_EltwiseOp_MAX: // Initialize mask = max_idx_.mutable_cpu_data(); dragon_set(count, -1, mask); dragon_set(count, Dtype(-FLT_MAX), top_data); // bottom 0 & 1 bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_a[idx] > bottom_data_b[idx]) { top_data[idx] = bottom_data_a[idx]; // maxval mask[idx] = 0; // maxid } else { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = 1; // maxid } } // bottom 2++ for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) { bottom_data_b = bottom[blob_idx]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_b[idx] > top_data[idx]) { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = blob_idx; // maxid } } } break; default: LOG(FATAL) << "Unknown elementwise operation."; } }
void Net<Dtype>::clearParamDiffs(){ for (int i = 0; i < learnable_params.size(); i++){ Blob<Dtype>* blob = learnable_params[i]; switch (Dragon::get_mode()){ case Dragon::CPU: dragon_set(blob->count(), (Dtype)0, blob->mutable_cpu_diff()); break; case Dragon::GPU: #ifndef CPU_ONLY dragon_gpu_set(blob->count(), (Dtype)0, blob->mutable_gpu_diff()); break; #endif } } }
void PoolingLayer<Dtype>::forward_cpu(const vector<Blob<Dtype>*> &bottom, const vector<Blob<Dtype>*> &top){ PoolingParameter pool_param = param.pooling_param(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const int top_count = top[0]->count(); const bool use_top_mask = top.size() > 1; int *mask = NULL; Dtype *top_mask = NULL; switch (pool_param.method()){ case PoolingParameter_Method_MAX: if (use_top_mask) top_mask = top[1]->mutable_cpu_data(); else mask = max_idx.mutable_cpu_data(); for (int n = 0; n < bottom[0]->num(); n++){ for (int c = 0; c < channels; c++){ for (int ph = 0; ph < pooling_height; ph++){ for (int pw = 0; pw < pooling_width; pw++){ // compute the start position int start_h = ph*stride_h - pad_h; int start_w = pw*stride_w - pad_w; // compute the end position // clip the position due to padding at the end int end_h = min(start_h + kernel_h, height); int end_w = min(start_w + kernel_w, width); // clip the position due to padding at the start start_h = max(start_h, 0); start_w = max(start_w, 0); // pool_idx represents the x_th output unit const int pool_idx = ph*pooling_width + pw; // for a fixed data and channel // we scan the max val and log the idx for diff_computing // note that bottom/top_data will offset later Dtype max_val = -FLT_MAX; int max_idx = -1; for (int h = start_h; h < end_h; h++){ for (int w = start_w; w < end_w; w++){ // idx represents the y_th im unit which the x_th output unit used const int idx = h*width + w; if (bottom_data[idx]>max_val){ max_val = bottom_data[idx]; max_idx = idx; } } // end w } // end h top_data[pool_idx] = max_val; if (use_top_mask) top_mask[pool_idx] = max_idx; else mask[pool_idx] = max_idx; } // end pw } // end ph // offset a channel bottom_data += bottom[0]->offset(0, 1); top_data += top[0]->offset(0, 1); if (use_top_mask) top_mask += top[0]->offset(0, 1); else mask += top[0]->offset(0, 1); } // end c } // end n break; case PoolingParameter_Method_AVG: dragon_set(top_count, Dtype(0), top_data); for (int n = 0; n < bottom[0]->num(); n++){ for (int c = 0; c < channels; c++){ for (int ph = 0; ph < pooling_height; ph++){ for (int pw = 0; pw < pooling_width; pw++){ int start_h = ph*stride_h - pad_h; int start_w = pw*stride_w - pad_w; int end_h = min(start_h + kernel_h, height + pad_h); int end_w = min(start_w + kernel_w, width + pad_w); // before cilp we need compute the pool area for average int pool_area = (end_h - start_h)*(end_w - start_w); // clip end_h = min(end_h, height); end_w = min(end_w, width); start_h = max(start_h, 0); start_w = max(start_w, 0); const int pool_idx = ph*pooling_width + pw; // sum up all units in the area for (int h = start_h; h < end_h; h++){ for (int w = start_w; w < end_w; w++){ const int idx = h*width + w; top_data[pool_idx] += bottom_data[idx]; } } // do average top_data[pool_idx] /= pool_area; // note that AVG pooling need not log the idx for diff_computing } //end pw } //end ph bottom_data += bottom[0]->offset(0, 1); top_data += top[0]->offset(0, 1); } //end c } //end n break; case PoolingParameter_Method_STOCHASTIC: NOT_IMPLEMENTED; break; default: LOG(FATAL) << "Unknown pooling method."; } }
void PoolingLayer<Dtype>::backward_cpu(const vector<Blob<Dtype>*> &top, const vector<bool> &data_need_bp, const vector<Blob<Dtype>*> &bottom){ // pooling layer only compute data_diff if (!data_need_bp[0]) return; PoolingParameter pool_param = param.pooling_param(); const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); dragon_set(bottom[0]->count(), Dtype(0), bottom_diff); const bool use_top_mask = top.size() > 1; const int* mask = NULL; const Dtype* top_mask = NULL; switch (pool_param.method()){ case PoolingParameter_Method_MAX: if (use_top_mask) top_mask = top[1]->cpu_data(); else mask = max_idx.cpu_data(); for (int n = 0; n < bottom[0]->num(); n++){ for (int c = 0; c < channels; c++){ for (int ph = 0; ph < pooling_height; ph++){ for (int pw = 0; pw < pooling_width; pw++){ const int pool_idx = ph*pooling_width + pw; const int idx = use_top_mask ? top_mask[pool_idx] : mask[pool_idx]; // bottom_diff += delta_(layer+1) // note that we allow overlapping pooling // it means that different top_diffs may have a same bottom_diff // because bottom_diff may overlap // use '+=' replace '=' if using overlapping pooling // also, using idx can consider as to decide a contributed bottom_diff // backward the sub gradient only to the contributed bottom_diff // non-contributed bottom_diff will keep zero which is setted in dragon_set() bottom_diff[idx] += top_diff[pool_idx]; } // end pw }// end ph bottom_diff += bottom[0]->offset(0, 1); top_diff += top[0]->offset(0, 1); if (use_top_mask) top_mask += top[0]->offset(0, 1); else mask += top[0]->offset(0, 1); } // end c }// end n break; case PoolingParameter_Method_AVG: for (int n = 0; n < bottom[0]->num(); n++){ for (int c = 0; c < channels; c++){ for (int ph = 0; ph < pooling_height; ph++){ for (int pw = 0; pw < pooling_width; pw++){ int start_h = ph*stride_h - pad_h; int start_w = pw*stride_w - pad_w; int end_h = min(start_h + kernel_h, height + pad_h); int end_w = min(start_w + kernel_w, width + pad_w); // before cilp we need compute the pool area for average int pool_area = (end_h - start_h)*(end_w - start_w); // clip end_h = min(end_h, height); end_w = min(end_w + kernel_w, width); start_h = max(start_h, 0); start_w = max(start_w, 0); const int pool_idx = ph*pooling_width + pw; // 1/(pool_area)*bottom_data=top_data // d(top_data)/d(bottom_data)=1/(pool_area) // combine with sub gradient and we get 'top_diff[pool_idx] / pool_area' for (int h = start_h; h < end_h; h++){ for (int w = start_w; w < end_w; w++){ const int idx = h*width + w; bottom_diff[idx] += (top_diff[pool_idx] / pool_area); } } } // end pw }// end ph bottom_diff += bottom[0]->offset(0, 1); top_diff += top[0]->offset(0, 1); } // end c }// end n break; case PoolingParameter_Method_STOCHASTIC: NOT_IMPLEMENTED; break; default: LOG(FATAL) << "Unknown pooling method."; } }