void PaddingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff()); if (pad_pos_) { for (int n = 0; n < num_; ++n) { for (int c = 0; c < channels_; ++c) { for (int h = 0; h < height_in_; ++h) { // copy the width part caffe_axpy(width_in_, (Dtype)1., top[0]->cpu_diff(n, c, h + pad_beg_, pad_beg_), bottom[0]->mutable_cpu_diff(n, c, h)); } } } } else { for (int n = 0; n < num_; ++n) { for (int c = 0; c < channels_; ++c) { for (int h = 0; h < height_out_; ++h) { // copy the width part caffe_axpy(width_out_, (Dtype)1., top[0]->cpu_diff(n, c, h), bottom[0]->mutable_cpu_diff(n, c, h - pad_beg_, - pad_beg_)); } } } } }
void SGDSolver<Dtype>::Regularize(int param_id) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; switch (Caffe::mode()) { case Caffe::CPU: { if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } break; } case Caffe::GPU: { #ifndef CPU_ONLY if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } #else NO_GPU; #endif break; } default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void MaskingLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { caffe_mul(top[0]->count(), bottom[0]->cpu_data(), this->blobs_[0]->cpu_data(), top[0]->mutable_cpu_data()); // multiply mask, y=a*b if (bias_term_) { caffe_axpy(top[0]->count(), (Dtype)1.0, this->blobs_[1]->cpu_data(), top[0]->mutable_cpu_data()); // y=a*x+y } }
void MultiStageMeanfieldLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { for (int i = (num_iterations_ - 1); i >= 0; --i) { meanfield_iterations_[i]->Backward_cpu(); } vector<bool> split_layer_propagate_down(1, true); split_layer_->Backward(split_layer_top_vec_, split_layer_propagate_down, split_layer_bottom_vec_); // Accumulate diffs from mean field iterations. for (int blob_id = 0; blob_id < this->blobs_.size(); ++blob_id) { Blob<Dtype>* cur_blob = this->blobs_[blob_id].get(); if (this->param_propagate_down_[blob_id]) { caffe_set(cur_blob->count(), Dtype(0), cur_blob->mutable_cpu_diff()); for (int i = 0; i < num_iterations_; ++i) { const Dtype* diffs_to_add = meanfield_iterations_[i]->blobs()[blob_id]->cpu_diff(); caffe_axpy(cur_blob->count(), Dtype(1.), diffs_to_add, cur_blob->mutable_cpu_diff()); } } } }
void WeightPlusLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){ const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_scal(dim_, Dtype(2.0), weight_two_.mutable_cpu_data()); // gradient with respect to weight for (int n = 0; n < batch_; ++n){ int offset = n*dim_; caffe_mul(dim_, weight_two_.cpu_data(), bottom_data + offset, data_meta_.mutable_cpu_data() + offset); caffe_mul(dim_, top_diff + offset, data_meta_.cpu_data() + offset, data_meta_.mutable_cpu_data() + offset); caffe_axpy(dim_, Dtype(1.0), data_meta_.cpu_data() + offset, blobs_[0]->mutable_cpu_diff()); } // gradient with respect to bottom data if (propagate_down[0]){ for (int n = 0; n < batch_; ++n){ int offset = n*dim_; caffe_mul(dim_, top_diff + offset, weight_two_.cpu_data(), bottom_diff + offset); } } }
void EmbedLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { CHECK(!propagate_down[0]) << "Can't backpropagate to EmbedLayer input."; if (this->param_propagate_down_[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); // Gradient with respect to weight Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); int index; for (int n = 0; n < M_; ++n) { index = static_cast<int>(bottom_data[n]); DCHECK_GE(index, 0); DCHECK_LT(index, K_); DCHECK_EQ(static_cast<Dtype>(index), bottom_data[n]) << "non-integer input"; caffe_axpy(N_, Dtype(1), top_diff + n * N_, weight_diff + index * N_); } } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, Dtype(1), top_diff, bias_multiplier_.cpu_data(), Dtype(1), bias_diff); } }
void IslandLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { // Gradient with respect to centers if (this->param_propagate_down_[0]) { const Dtype* label = bottom[1]->cpu_data(); Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff(); const Dtype* center_data = this->blobs_[0]->cpu_data(); Dtype* variation_sum_data = variation_sum_.mutable_cpu_data(); const Dtype* distance_data = distance_.cpu_data(); // \sum_{y_i==j} caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data()); for (int n = 0; n < N_; n++) { int count = 0; for (int m = 0; m < M_; m++) { const int label_value = static_cast<int>(label[m]); if (label_value == n) { count++; caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_); } } caffe_axpy(K_, (Dtype)1. / (count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_); } //xcenter_loss backward for (int n = 0; n < N_; ++n){ Dtype double_center_module_n = center_module_[n] * center_module_[n]; for (int i = 0; i < N_; ++i){ if (i == n){ continue; } else{ // 更新i Dtype alpha = center_module_[n] * center_module_[i]; Dtype belta = center_dot_[n][i] / (alpha*double_center_module_n); //alpha*c_i-beta*c_n for (int k = 0; k < K_; ++k){ //由于重复计算,实际计算的次数为2因此 center_diff的值需要乘以2 center_diff[n*K_ + k] = 2*lambda_/(N_-1)*(alpha*center_data[i*K_ + k] - belta*center_data[n*K_ + k]); } } } } } // Gradient with respect to bottom data if (propagate_down[0]) { caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff()); caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff()); } if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } }
void SGDFeedbackSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (local_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (local_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void EltwiseLayer<Dtype, MItype, MOtype>::Forward_cpu( const vector<Blob<MItype>*>& bottom, const vector<Blob<MOtype>*>& top) { int_tp* mask = NULL; const Dtype* bottom_data_a = NULL; const Dtype* bottom_data_b = NULL; const int_tp count = top[0]->count(); Dtype* top_data = top[0]->mutable_cpu_data(); Dtype maxVal = FLT_MAX; if (std::is_same<Dtype, half_fp>::value) maxVal = HALF_MAX; switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data); for (int_tp i = 2; i < bottom.size(); ++i) { caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_SUM: caffe_set(count, Dtype(0), top_data); // TODO(shelhamer) does BLAS optimize to sum for coeff = 1? for (int_tp i = 0; i < bottom.size(); ++i) { caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_MAX: // Initialize mask = max_idx_.mutable_cpu_data(); caffe_set(count, (int_tp)-1, mask); caffe_set(count, Dtype(-maxVal), top_data); // bottom 0 & 1 bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); for (int_tp idx = 0; idx < count; ++idx) { if (bottom_data_a[idx] > bottom_data_b[idx]) { top_data[idx] = bottom_data_a[idx]; // maxval mask[idx] = 0; // maxid } else { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = 1; // maxid } } // bottom 2++ for (int_tp blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) { bottom_data_b = bottom[blob_idx]->cpu_data(); for (int_tp idx = 0; idx < count; ++idx) { if (bottom_data_b[idx] > top_data[idx]) { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = blob_idx; // maxid } } } break; default: LOG(FATAL) << "Unknown elementwise operation."; } }
void AugmentationLayerBase<Dtype>::add_coeff_to_array(const AugmentationCoeff& coeff, Dtype* out_params) { int num_params = coeff.GetDescriptor()->field_count(); Blob<Dtype> curr_params_blob(1, num_params, 1, 1); Dtype* curr_params = curr_params_blob.mutable_cpu_data(); AugmentationLayerBase<Dtype>::coeff_to_array(coeff, curr_params); caffe_axpy(num_params, Dtype(1), curr_params, out_params); }
void SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const bool propagate_down, vector<Blob<Dtype>*>* bottom) { if (propagate_down) { (*bottom)[0]->ShareDiff(*top[0]); // Add remaining top blob diffs. Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); for (int i = 1; i < top.size(); ++i) { const Dtype* top_diff = top[i]->cpu_diff(); caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff); } } }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); // get the learning rate Dtype rate = GetLearningRate(); Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); // LOG(ERROR) << "rate:" << rate << " momentum:" << momentum // << " weight_decay:" << weight_decay; switch (Caffe::mode()) { case Caffe::CPU: for (size_t param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. caffe_axpby(net_params[param_id]->count(), rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (weight_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), weight_decay * rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: for (size_t param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. caffe_gpu_axpby(net_params[param_id]->count(), rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (weight_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), weight_decay * rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void TileLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); for (int i = 0; i < outer_dim_; ++i) { caffe_copy(inner_dim_, top_diff, bottom_diff); top_diff += inner_dim_; for (int t = 1; t < tiles_; ++t) { caffe_axpy(inner_dim_, Dtype(1), top_diff, bottom_diff); top_diff += inner_dim_; } bottom_diff += inner_dim_; } }
void SubStackFixLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const int inner_num=bottom[0]->height()*bottom[0]->width()*bottom[0]->channels(); for (int e1=0;e1<sweepern_;++e1){ caffe_set(inner_num, Dtype(0), top_data); for (int e2=0;e2<sweepern_;++e2){ if(e1!=e2){ caffe_axpy(inner_num, Dtype(1.0), bottom_data , top_data); } bottom_data += bottom[0]->offset(1, 0); } top_data += top[0]->offset(1, 0); } }
void SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } if (top.size() == 1) { caffe_copy(count_, top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff()); return; } caffe_add(count_, top[0]->cpu_diff(), top[1]->cpu_diff(), bottom[0]->mutable_cpu_diff()); // Add remaining top blob diffs. for (int i = 2; i < top.size(); ++i) { const Dtype* top_diff = top[i]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff); } }
Dtype EltwiseLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const int count = (*top)[0]->count(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data); for (int i = 2; i < bottom.size(); ++i) { caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_SUM: caffe_set(count, Dtype(0), top_data); // TODO(shelhamer) does BLAS optimize to sum for coeff = 1? for (int i = 0; i < bottom.size(); ++i) { caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); } break; default: LOG(FATAL) << "Unknown elementwise operation."; } return Dtype(0.); }
void Tensor<Dtype>::AddMulFrom(const Tensor& source, Dtype alpha) { if (source.count() != count_ || source.shape() != shape_) { ASSERT(false, "Trying to add blobs of different sizes: " << source.count() << " != " << count_); } switch (mode()) { case Caffe::CPU: caffe_axpy(count_, alpha, source.cpu_mem(), this->mutable_cpu_mem()); break; case Caffe::GPU: #ifndef CPU_ONLY caffe_gpu_axpy(count_, alpha, source.gpu_mem(), this->mutable_gpu_mem()); #else NO_GPU; #endif break; default: ASSERT(false, "Unknown caffe mode."); } }
void AugmentationLayerBase<Dtype>::generate_valid_spatial_coeffs(const AugmentationParameter& aug, AugmentationCoeff& coeff, Dtype discount_coeff, int width, int height, int cropped_width, int cropped_height, int max_num_tries) { int x, y; Dtype x1, y1, x2, y2; int counter = 0; int good_params = 0; int num_params = coeff.GetDescriptor()->field_count(); Blob<Dtype> in_params_blob(1, num_params, 1, 1); Dtype* in_params = in_params_blob.mutable_cpu_data(); Blob<Dtype> curr_params_blob(1, num_params, 1, 1); Dtype* curr_params = curr_params_blob.mutable_cpu_data(); // convert incoming params to an array AugmentationLayerBase<Dtype>::coeff_to_array(coeff, in_params); while (good_params < 4 && counter < max_num_tries) { // generate params AugmentationLayerBase<Dtype>::clear_all_coeffs(coeff); AugmentationLayerBase<Dtype>::generate_spatial_coeffs(aug, coeff, discount_coeff); // LOG(INFO) << "DEBUG: try dx = " << coeff.dx() << ", dy = " << coeff.dy(); // add incoming params AugmentationLayerBase<Dtype>::coeff_to_array(coeff, curr_params); caffe_axpy(num_params, Dtype(1), in_params, curr_params); AugmentationLayerBase<Dtype>::array_to_coeff(curr_params, coeff); // check if all 4 corners of the transformed image fit into the original image good_params = 0; for (x = 0; x < cropped_width; x += cropped_width-1) { for (y = 0; y < cropped_height; y += cropped_height-1) { // move the origin and mirror if (coeff.mirror()) { x1 = - static_cast<Dtype>(x) + .5 * static_cast<Dtype>(cropped_width); y1 = static_cast<Dtype>(y) - .5 * static_cast<Dtype>(cropped_height); } else { x1 = static_cast<Dtype>(x) - .5 * static_cast<Dtype>(cropped_width); y1 = static_cast<Dtype>(y) - .5 * static_cast<Dtype>(cropped_height); } // rotate x2 = cos(coeff.angle()) * x1 - sin(coeff.angle()) * y1; y2 = sin(coeff.angle()) * x1 + cos(coeff.angle()) * y1; // translate x2 = x2 + coeff.dx() * static_cast<Dtype>(cropped_width); y2 = y2 + coeff.dy() * static_cast<Dtype>(cropped_height); // zoom x2 = x2 / coeff.zoom_x(); y2 = y2 / coeff.zoom_y(); // move the origin back x2 = x2 + .5 * static_cast<Dtype>(width); y2 = y2 + .5 * static_cast<Dtype>(height); if (!(floor(x2) < 0 || floor(x2) > static_cast<Dtype>(width - 2) || floor(y2) < 0 || floor(y2) > static_cast<Dtype>(height - 2))) good_params++; } } counter++; } if (counter >= max_num_tries) { AugmentationLayerBase<Dtype>::array_to_coeff(in_params, coeff); LOG(WARNING) << "Augmentation: Exceeded maximum tries in finding spatial coeffs."; } // LOG(INFO) << "DEBUG: final dx = " << coeff.dx() << ", dy = " << coeff.dy(); }
void AdaGradSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = this->GetLearningRate(); Dtype delta = this->param_.delta(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history caffe_add(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->history_[param_id]->mutable_cpu_data()); // prepare update caffe_powx(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); caffe_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_cpu_data()); caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // scale and copy caffe_cpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->cpu_data(), Dtype(0), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), this->temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // compute square of gradient in update caffe_gpu_powx(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), Dtype(2), this->update_[param_id]->mutable_gpu_data()); // update history caffe_gpu_add(net_params[param_id]->count(), this->update_[param_id]->gpu_data(), this->history_[param_id]->gpu_data(), this->history_[param_id]->mutable_gpu_data()); // prepare update caffe_gpu_powx(net_params[param_id]->count(), this->history_[param_id]->gpu_data(), Dtype(0.5), this->update_[param_id]->mutable_gpu_data()); caffe_gpu_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_gpu_data()); caffe_gpu_div(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); // scale and copy caffe_gpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->gpu_data(), Dtype(0), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void NesterovSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = this->GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // save history momentum for stepping back caffe_copy(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // update history caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, this->history_[param_id]->mutable_cpu_data()); // compute udpate: step back then over step caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, this->history_[param_id]->cpu_data(), -momentum, this->update_[param_id]->mutable_cpu_data()); // copy caffe_copy(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { // save history momentum for stepping back caffe_copy(net_params[param_id]->count(), this->history_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), this->temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // update history caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, this->history_[param_id]->mutable_gpu_data()); // compute udpate: step back then over step caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, this->history_[param_id]->gpu_data(), -momentum, this->update_[param_id]->mutable_gpu_data()); // copy caffe_copy(net_params[param_id]->count(), this->update_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); Dtype momentum = this->param_.momentum(); if (this->param_.momentum_burnin() > this->iter_) { momentum = momentum * this->iter_ / this->param_.momentum_burnin(); } if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate << ", mom = " << momentum; } Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void SGDSolver<Dtype>::Regularize(int param_id) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; switch (Caffe::mode()) { case Caffe::CPU: { if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL)<< "Unknown regularization type: " << regularization_type; } } break; } case Caffe::GPU: { #ifndef CPU_ONLY if (this->device_->backend() == BACKEND_CUDA) { #ifdef USE_CUDA if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL)<< "Unknown regularization type: " << regularization_type; } } #endif // USE_CUDA } else { #ifdef USE_GREENTEA if (local_decay) { if (regularization_type == "L2") { // add weight decay greentea_gpu_axpy<Dtype>(this->device_->id(), net_params[param_id]->count(), local_decay, (cl_mem)(net_params[param_id]->gpu_data()), 0, (cl_mem)(net_params[param_id]->mutable_gpu_diff()), 0); } else if (regularization_type == "L1") { greentea_gpu_sign<Dtype>(this->device_->id(), net_params[param_id]->count(), (cl_mem)(net_params[param_id]->gpu_data()), 0, (cl_mem)(temp_[param_id]->mutable_gpu_data()), 0); greentea_gpu_axpy<Dtype>(this->device_->id(), net_params[param_id]->count(), local_decay, (cl_mem)(temp_[param_id]->gpu_data()), 0, (cl_mem)(net_params[param_id]->mutable_gpu_diff()), 0); } else { LOG(FATAL)<< "Unknown regularization type: " << regularization_type; } } #endif // USE_GREENTEA } #else NO_GPU; #endif break; } default: { LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } } }
Dtype EltwiseLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { for (int i=1;i<bottom.size();i++){ const Dtype* bottom_data = NULL; Dtype b_sum=0; bottom_data =bottom[i]->cpu_data(); const int b_count=bottom[i]->count(); for(int j=0;j<b_count;j++){ b_sum=b_sum+bottom_data[j]; } if (b_sum==0){ LOG(INFO)<<"bottom "<<i<<" = 0"; } } const int count = (*top)[0]->count(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); const Dtype* bottom_data_a = NULL; const Dtype* bottom_data_b = NULL; int* mask = NULL; switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data); for (int i = 2; i < bottom.size(); ++i) { caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_SUM: caffe_set(count, Dtype(0), top_data); // TODO(shelhamer) does BLAS optimize to sum for coeff = 1? for (int i = 0; i < bottom.size(); ++i) { caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_MAX: // Initialize mask = max_idx_.mutable_cpu_data(); caffe_set(count, -1, mask); caffe_set(count, Dtype(-FLT_MAX), top_data); // bottom 0 & 1 bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_a[idx] > bottom_data_b[idx]) { top_data[idx] = bottom_data_a[idx]; // maxval mask[idx] = 0; // maxid } else { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = 1; // maxid } } // bottom 2++ for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) { bottom_data_b = bottom[blob_idx]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_b[idx] > top_data[idx]) { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = blob_idx; // maxid } } } break; default: LOG(FATAL) << "Unknown elementwise operation."; } return Dtype(0.); }
void EltwiseLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int* mask = NULL; const Dtype* bottom_data_a = NULL; const Dtype* bottom_data_b = NULL; const int count = top[0]->count(); Dtype* top_data = top[0]->mutable_cpu_data(); if (broadcast_) { int dima[4]; int dimb[4]; for (int i=0; i<4; i++) { dima[i] = bottom[0]->shape()[i]; dimb[i] = bottom[1]->shape()[i]; } bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_mul_broadcast<Dtype>(dima, dimb, bottom_data_a, bottom_data_b, top_data); break; case EltwiseParameter_EltwiseOp_SUM: caffe_add_broadcast<Dtype>(dima, dimb, bottom_data_a, bottom_data_b, top_data); break; default: LOG(FATAL) << "Unknown elementwise broadcast operation."; } } else { switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data); for (int i = 2; i < bottom.size(); ++i) { caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_SUM: if (coeffs_[0]==1.0) { caffe_copy(count, bottom[0]->cpu_data(), top_data); } else { caffe_set(count, Dtype(0.), top_data); caffe_axpy(count, coeffs_[0], bottom[0]->cpu_data(), top_data); } for (int i = 1; i < bottom.size(); ++i) { if (coeffs_[i]==1.0) caffe_add (count, top_data, bottom[i]->cpu_data(), top_data); else caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_MAX: // Initialize mask = max_idx_.mutable_cpu_data(); caffe_set(count, -1, mask); caffe_set(count, Dtype(-FLT_MAX), top_data); // bottom 0 & 1 bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_a[idx] > bottom_data_b[idx]) { top_data[idx] = bottom_data_a[idx]; // maxval mask[idx] = 0; // maxid } else { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = 1; // maxid } } // bottom 2++ for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) { bottom_data_b = bottom[blob_idx]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_b[idx] > top_data[idx]) { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = blob_idx; // maxid } } } break; default: LOG(FATAL) << "Unknown elementwise operation."; } } }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<string>& net_params_lr_policy = this->net_->params_lr_policy(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (local_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: //LOG(INFO) << "Installing local lr policy"; for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate; if(net_params_lr_policy[param_id] == "naive_inv") { local_rate = rate * net_params_lr[param_id] * Dtype(1.0)/(this->iter_/500 + 1); //LOG(INFO) << "rate: " << rate << " local rate: " << net_params_lr[param_id] << " inv coeff: " << Dtype(1.0)/(this->iter_/500 + 1) << " hehe: " << (this->iter_/500 + 1); } else if (net_params_lr_policy[param_id] == "power_inv") { local_rate = rate * net_params_lr[param_id] * pow(Dtype(1.0) + this->param_.localgamma() * this->iter_, - this->param_.localpower()); //LOG(INFO) << "local rate: " << local_rate; } else if (net_params_lr_policy[param_id] == "step") { int current_step = this->iter_ / this->param_.localstepsize(); local_rate = rate * net_params_lr[param_id] * pow(this->param_.localgamma(), current_step); } else if (net_params_lr_policy[param_id] == "nothing") local_rate = rate * net_params_lr[param_id]; else LOG(FATAL) << "Unknown caffe local policy: " << net_params_lr_policy[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (local_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void caffe_cpu_csr_gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const Dtype alpha, const int nzz, const Dtype* A, const int* indices, const int* ptr, const Dtype* B, const Dtype beta, Dtype* C, const CBLAS_ORDER orderC) { if (TransA == CblasNoTrans) { // CSR caffe_scal(M * N, beta, C); if (orderC == CblasRowMajor) { if (TransB == CblasNoTrans) { for (int rowA = 0; rowA < M; rowA++) { const int begin = ptr[rowA]; const int end = ptr[rowA + 1]; Dtype* CrowA = C + (N * rowA); for (int pos = begin; pos < end; pos++) { const Dtype* BcolAN = B + (indices[pos] * N); const Dtype AatPos = alpha * A[pos]; caffe_axpy(N, AatPos, BcolAN, CrowA, 1, 1); } } } else { for (int rowA = 0; rowA < M; rowA++) { const int begin = ptr[rowA]; const int end = ptr[rowA + 1]; Dtype* CrowA = C + (N * rowA); for (int pos = begin; pos < end; pos++) { const Dtype AatPos = alpha * A[pos]; const Dtype* BcolA = B + indices[pos]; caffe_axpy(N, AatPos, BcolA, CrowA, K, 1); } } } } else { if (TransB == CblasNoTrans) { for (int rowA = 0; rowA < M; rowA++) { const int begin = ptr[rowA]; const int end = ptr[rowA + 1]; Dtype* CrowA = C + rowA; for (int pos = begin; pos < end; pos++) { const Dtype* BcolAN = B + (indices[pos] * N); const Dtype AatPos = alpha * A[pos]; caffe_axpy(N, AatPos, BcolAN, CrowA, 1, M); } } } else { for (int rowA = 0; rowA < M; rowA++) { const int begin = ptr[rowA]; const int end = ptr[rowA + 1]; Dtype* CrowA = C + rowA; for (int pos = begin; pos < end; pos++) { const Dtype* BcolA = B + indices[pos]; const Dtype AatPos = alpha * A[pos]; caffe_axpy(N, AatPos, BcolA, CrowA, K, M); } } } } } else { // A is CSC caffe_scal(M * N, beta, C); if (orderC == CblasRowMajor) { if (TransB == CblasNoTrans) { for (int colA = 0; colA < K; colA++) { const int begin = ptr[colA]; const int end = ptr[colA + 1]; const Dtype* BColAN = B + (colA * N); for (int pos = begin; pos < end; pos++) { caffe_axpy(N, A[pos] * alpha, BColAN, C + (indices[pos] * N), 1, 1); } } } else { for (int colA = 0; colA < K; colA++) { const int begin = ptr[colA]; const int end = ptr[colA + 1]; const Dtype* BColA = B + colA; for (int pos = begin; pos < end; pos++) { caffe_axpy(N, A[pos] * alpha, BColA, C + (indices[pos] * N), K, 1); } } } } else { if (TransB == CblasNoTrans) { for (int colA = 0; colA < K; colA++) { const int begin = ptr[colA]; const int end = ptr[colA + 1]; const Dtype* BColAN = B + (colA * N); for (int pos = begin; pos < end; pos++) { caffe_axpy(N, A[pos] * alpha, BColAN, C + indices[pos], 1, M); } } } else { for (int colA = 0; colA < K; colA++) { const int begin = ptr[colA]; const int end = ptr[colA + 1]; const Dtype* BColA = B + colA; for (int pos = begin; pos < end; pos++) { caffe_axpy(N, A[pos] * alpha, BColA, C + indices[pos], K, M); } } } } } }