void ScalarLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const ScalarParameter& param = this->layer_param_.scalar_param(); Blob<Dtype>* scalar = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get(); // Always set axis_ == 0 in special case where scalar is an actual scalar // (num_axes == 0). Mathematically equivalent for any choice of axis_, so the // actual setting can be safely ignored; and computation is most efficient // with axis_ == 0 and (therefore) outer_dim_ == 1. (Setting axis_ to // bottom[0]->num_axes() - 1, giving inner_dim_ == 1, would be equally // performant.) axis_ = (scalar->num_axes() == 0) ? 0 : bottom[0]->CanonicalAxisIndex(param.axis()); CHECK_GE(bottom[0]->num_axes(), axis_ + scalar->num_axes()) << "scalar blob's shape extends past bottom[0]'s shape when applied " << "starting with bottom[0] axis = " << axis_; for (int i = 0; i < scalar->num_axes(); ++i) { CHECK_EQ(bottom[0]->shape(axis_ + i), scalar->shape(i)) << "dimension mismatch between bottom[0]->shape(" << axis_ + i << ") and scalar->shape(" << i << ")"; } outer_dim_ = bottom[0]->count(0, axis_); scalar_dim_ = scalar->count(); inner_dim_ = bottom[0]->count(axis_ + scalar->num_axes()); if (bottom[0] == top[0]) { // in-place computation temp_.ReshapeLike(*bottom[0]); } else { top[0]->ReshapeLike(*bottom[0]); } sum_result_.Reshape(vector<int>(1, outer_dim_ * scalar_dim_)); const int sum_mult_size = std::max(outer_dim_, inner_dim_); sum_multiplier_.Reshape(vector<int>(1, sum_mult_size)); if (sum_multiplier_.cpu_data()[sum_mult_size - 1] != Dtype(1)) { caffe_set(sum_mult_size, Dtype(1), sum_multiplier_.mutable_cpu_data()); } }
TYPED_TEST(DataTransformTest, TestCropSize) { TransformationParameter transform_param; const bool unique_pixels = false; // all pixels the same equal to label const int label = 0; const int channels = 3; const int height = 4; const int width = 5; const int crop_size = 2; transform_param.set_crop_size(crop_size); Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); DataTransformer<TypeParam>* transformer = new DataTransformer<TypeParam>(transform_param, TEST, Caffe::GetDefaultDeviceContext()); transformer->InitRand(); Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, crop_size, crop_size); for (int iter = 0; iter < this->num_iter_; ++iter) { transformer->Transform(datum, blob); EXPECT_EQ(blob->num(), 1); EXPECT_EQ(blob->channels(), datum.channels()); EXPECT_EQ(blob->height(), crop_size); EXPECT_EQ(blob->width(), crop_size); for (int j = 0; j < blob->count(); ++j) { EXPECT_EQ(blob->cpu_data()[j], label); } } }
Blob::Blob(const Blob &source) { if (data_) data_ = nullptr; shape_ = source.shape(); count_ = source.count(); data_ = source.data(); }
TYPED_TEST(Col2ImgMaskLayerTest, TestForward_2) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); //convolution_param->set_kernel_size(0,3); //convolution_param->set_stride(0,2); convolution_param->add_kernel_size(3); convolution_param->add_stride(2); caffe_set(blob_bottom_->count(), (Dtype)2, blob_bottom_->mutable_cpu_data()); Blob<Dtype> mask; mask.ReshapeLike(*blob_bottom_); caffe_set(mask.count(), (Dtype)1, mask.mutable_cpu_data()); vector<Blob<Dtype>*> blob_bottom_vec_2_; blob_bottom_vec_2_.push_back(blob_bottom_); blob_bottom_vec_2_.push_back(&mask); Col2imgMaskLayer<Dtype> layer(layer_param); layer.SetUp(blob_bottom_vec_2_, blob_top_vec_); EXPECT_EQ(this->blob_top_->num(), 2); EXPECT_EQ(this->blob_top_->channels(), 2); EXPECT_EQ(this->blob_top_->height(), 5); EXPECT_EQ(this->blob_top_->width(), 5); layer.Forward(blob_bottom_vec_, blob_top_vec_); const Dtype min_precision = 1e-5; for (int i = 0; i < blob_top_->count(); i++) EXPECT_NEAR(blob_top_->mutable_cpu_data()[i], 2, min_precision); }
void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); if (concat_dim_ == 0) { int offset_num = 0; for (int i = 0; i < bottom.size(); ++i) { Blob<Dtype>* blob = bottom[i]; if (propagate_down[i]) { Dtype* bottom_diff = blob->mutable_cpu_diff(); caffe_copy(blob->count(), top_diff + top[0]->offset(offset_num), bottom_diff); } offset_num += blob->num(); } } else if (concat_dim_ == 1) { int offset_channel = 0; for (int i = 0; i < bottom.size(); ++i) { Blob<Dtype>* blob = bottom[i]; if (propagate_down[i]) { Dtype* bottom_diff = blob->mutable_cpu_diff(); int num_elem = blob->channels()*blob->height()*blob->width(); for (int n = 0; n < num_; ++n) { caffe_copy(num_elem, top_diff + top[0]->offset(n, offset_channel), bottom_diff + blob->offset(n)); } } offset_channel += blob->channels(); } } // concat_dim_ is guaranteed to be 0 or 1 by LayerSetUp. }
Dtype SliceLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* bottom_data = bottom[0]->mutable_cpu_data(); if (slice_dim_ == 0) { int offset_num = 0; for (int i = 0; i < top->size(); ++i) { Blob<Dtype>* blob = (*top)[i]; Dtype* top_data = blob->mutable_cpu_data(); caffe_copy(blob->count(), bottom_data + bottom[0]->offset(offset_num), top_data); offset_num += blob->num(); } } else if (slice_dim_ == 1) { int offset_channel = 0; for (int i = 0; i < top->size(); ++i) { Blob<Dtype>* blob = (*top)[i]; Dtype* top_data = blob->mutable_cpu_data(); const int num_elem = blob->channels() * blob->height() * blob->width()*blob->depth(); for (int n = 0; n < num_; ++n) { caffe_copy(num_elem, bottom_data + bottom[0]->offset(n, offset_channel), top_data + blob->offset(n)); } offset_channel += blob->channels(); } } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. return Dtype(0.); }
void InfogainLossLayer<Dtype, MItype, MOtype>::Reshape( const vector<Blob<MItype>*>& bottom, const vector<Blob<MOtype>*>& top) { LossLayer<Dtype, MItype, MOtype>::Reshape(bottom, top); softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_); infogain_axis_ = bottom[0]->CanonicalAxisIndex( this->layer_param_.infogain_loss_param().axis()); outer_num_ = bottom[0]->count(0, infogain_axis_); inner_num_ = bottom[0]->count(infogain_axis_ + 1); CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count()) << "Number of labels must match number of predictions; " << "e.g., if infogain axis == 1 and prediction shape is (n, c, H, W), " << "label count (number of labels) must be n*H*W, " << "with integer values in {0, 1, ..., c-1}."; num_labels_ = bottom[0]->shape(infogain_axis_); Blob<Dtype>* infogain = NULL; if (bottom.size() < 3) { infogain = &infogain_; } else { infogain = bottom[2]; } CHECK_EQ(infogain->count(), num_labels_*num_labels_); sum_rows_H_.Reshape(vector<int_tp>(1, num_labels_)); if (bottom.size() == 2) { // H is provided as a parameter and will not change. sum rows once sum_rows_of_H(infogain); } if (top.size() >= 2) { // softmax output top[1]->ReshapeLike(*bottom[0]); } }
void SliceLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) { if (!propagate_down[0]) { return; } Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); if (slice_dim_ == 0) { int offset_num = 0; for (int i = 0; i < top.size(); ++i) { Blob<Dtype>* blob = top[i]; const Dtype* top_diff = blob->cpu_diff(); caffe_copy(blob->count(), top_diff, bottom_diff + (*bottom)[0]->offset(offset_num)); offset_num += blob->num(); } } else if (slice_dim_ == 1) { int offset_channel = 0; for (int i = 0; i < top.size(); ++i) { Blob<Dtype>* blob = top[i]; const Dtype* top_diff = blob->cpu_diff(); const int num_elem = blob->channels() * blob->height() * blob->width() * blob->depth(); for (int n = 0; n < num_; ++n) { caffe_copy(num_elem, top_diff + blob->offset(n), bottom_diff + (*bottom)[0]->offset(n, offset_channel)); } offset_channel += blob->channels(); } } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. }
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { if (source.count() != count_ || source.shape() != shape_) { if (reshape) { ReshapeLike(source); } else { LOG(FATAL) << "Trying to copy blobs of different sizes."; } } switch (Caffe::mode()) { case Caffe::GPU: if (copy_diff) { caffe_copy(count_, source.gpu_diff(), static_cast<Dtype*>(diff_->mutable_gpu_data())); } else { caffe_copy(count_, source.gpu_data(), static_cast<Dtype*>(data_->mutable_gpu_data())); } break; case Caffe::CPU: if (copy_diff) { caffe_copy(count_, source.cpu_diff(), static_cast<Dtype*>(diff_->mutable_cpu_data())); } else { caffe_copy(count_, source.cpu_data(), static_cast<Dtype*>(data_->mutable_cpu_data())); } break; default: LOG(FATAL) << "Unknown caffe mode."; } }
void BiasLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const BiasParameter& param = this->layer_param_.bias_param(); Blob<Dtype>* bias = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get(); // Always set axis == 0 in special case where bias is a scalar // (num_axes == 0). Mathematically equivalent for any choice of axis, so the // actual setting can be safely ignored; and computation is most efficient // with axis == 0 and (therefore) outer_dim_ == 1. const int axis = (bias->num_axes() == 0) ? 0 : bottom[0]->CanonicalAxisIndex(param.axis()); CHECK_GE(bottom[0]->num_axes(), axis + bias->num_axes()) << "bias blob's shape extends past bottom[0]'s shape when applied " << "starting with bottom[0] axis = " << axis; for (int i = 0; i < bias->num_axes(); ++i) { CHECK_EQ(bottom[0]->shape(axis + i), bias->shape(i)) << "dimension mismatch between bottom[0]->shape(" << axis + i << ") and bias->shape(" << i << ")"; } outer_dim_ = bottom[0]->count(0, axis); bias_dim_ = bias->count(); inner_dim_ = bottom[0]->count(axis + bias->num_axes()); dim_ = bias_dim_ * inner_dim_; if (bottom[0] != top[0]) { top[0]->ReshapeLike(*bottom[0]); } bias_multiplier_.Reshape(vector<int>(1, inner_dim_)); if (bias_multiplier_.cpu_data()[inner_dim_ - 1] != Dtype(1)) { caffe_set(inner_dim_, Dtype(1), bias_multiplier_.mutable_cpu_data()); } }
void Blob<Dtype>::ShareDiff(const Blob& other) { // SID MEMORY COMPACT LIGHT WEIGHT CAFFE <BEGIN> if(_is_diff_initialized==0){ diff_.reset(new SyncedMemory(capacity_*sizeof(Dtype))); _is_diff_initialized=1; } // SID MEMORY COMPACT LIGHT WEIGHT CAFFE <END> CHECK_EQ(count_, other.count()); diff_ = other.diff(); }
int NumSequenceMatches(const TransformationParameter transform_param, const Datum& datum, Phase phase) { // Get crop sequence with Caffe seed 1701. DataTransformer<Dtype>* transformer = new DataTransformer<Dtype>(transform_param, phase); const int crop_size = transform_param.crop_size(); int crop_h = transform_param.crop_h(); int crop_w = transform_param.crop_w(); if (crop_size > 0) { crop_h = crop_w = crop_size; } Caffe::set_random_seed(seed_); transformer->InitRand(); Blob<Dtype>* blob = new Blob<Dtype>(1, datum.channels(), datum.height(), datum.width()); if (crop_h > 0 || crop_w > 0) { blob->Reshape(1, datum.channels(), crop_h, crop_w); } vector<vector<Dtype> > crop_sequence; for (int iter = 0; iter < this->num_iter_; ++iter) { vector<Dtype> iter_crop_sequence; transformer->Transform(datum, blob); for (int j = 0; j < blob->count(); ++j) { iter_crop_sequence.push_back(blob->cpu_data()[j]); } crop_sequence.push_back(iter_crop_sequence); } // Check if the sequence differs from the previous int num_sequence_matches = 0; for (int iter = 0; iter < this->num_iter_; ++iter) { vector<Dtype> iter_crop_sequence = crop_sequence[iter]; transformer->Transform(datum, blob); for (int j = 0; j < blob->count(); ++j) { num_sequence_matches += (crop_sequence[iter][j] == blob->cpu_data()[j]); } } return num_sequence_matches; }
static void write_blob_to_file(const std::string& file_name, const Blob<Dtype>& blob) { std::ofstream file(file_name.c_str(), std::ios::out | std::ios::binary); if (file.fail()) { ASSERT_FALSE(true); return; } file.write(reinterpret_cast<const char*>(&blob.shape()[0]), 4 * sizeof(int)); ASSERT_FALSE(file.fail()); file.write(reinterpret_cast<const char*>(blob.cpu_data()), blob.count() * sizeof(Dtype)); ASSERT_FALSE(file.fail()); file.close(); }
void AsyncParamServer<Dtype>::ProcessUpdateTask() { const vector<Blob<Dtype> *> &net_params = solver_->net()->learnable_params(); std::deque<TaskRequest> to_update; update_queue_mutex_.lock(); to_update.swap(update_tasks_); update_queue_mutex_.unlock(); while (!to_update.empty() ) { TaskRequest task = to_update.front(); to_update.pop_front(); // copy to diff in solver int root_rank = world_rank_to_root_rank(task.part_root_rank_); Blob<Dtype>* blob = net_params[task.param_id_]; Dtype* solver_diff = blob->mutable_cpu_diff(); Dtype* mpi_buf = recv_buf_[make_pair(root_rank, task.param_id_)].first; int64_t count = recv_buf_[make_pair(root_rank, task.param_id_)].second; CHECK(count == blob->count() ); //copy MPI buffer to solver_diff int64_t part_offset = task.part_id_ * count / task.num_parts_; caffe_copy(count / task.num_parts_, mpi_buf + part_offset, solver_diff + part_offset); // apply update int blob_wise_iter = async_iter_[make_pair(task.param_id_, task.part_id_) ]; solver_->set_iter(blob_wise_iter); // TODO: supports partial param update per model parts solver_->ApplyUpdate(task.param_id_); DLOG(INFO) << "PS (iter " << blob_wise_iter << "): param id=" << task.param_id_ << " weight=" << net_params[task.param_id_]->sumsq_diff(); DLOG(INFO) << "PS (iter " << blob_wise_iter << "): param id=" << task.param_id_ << " data=" << net_params[task.param_id_]->sumsq_data(); //clean up solver_->net()->ClearParamDiffs(task.param_id_); async_iter_[ make_pair(task.param_id_, task.part_id_) ] += 1; update_cnt_ += 1; // copy model(data) in solver to mpi buffer mpi_buf = send_buf_[make_pair(root_rank, task.param_id_)].first; caffe_copy(count / task.num_parts_, blob->cpu_data() + part_offset, mpi_buf + part_offset); //ship off send_queue_mutex_.lock(); send_tasks_.push_back(task); send_queue_mutex_.unlock(); } }
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { if (source.count() != count_ || source.shape() != shape_) { if (reshape) { ReshapeLike(source); } else { LOG(FATAL)<< "Trying to copy blobs of different sizes."; } } switch (Caffe::mode()) { case Caffe::GPU: { if (device_->backend() == BACKEND_CUDA) { if (copy_diff) { caffe_copy(count_, source.gpu_diff(), static_cast<Dtype*>(diff_->mutable_gpu_data())); } else { caffe_copy(count_, source.gpu_data(), static_cast<Dtype*>(data_->mutable_gpu_data())); } } else { #ifdef USE_GREENTEA if (copy_diff) { greentea_copy<Dtype>( count_, (cl_mem) (source.gpu_diff()), 0, (cl_mem) (diff_->mutable_gpu_data()), 0, &viennacl::ocl::get_context(device_->id())); } else { greentea_copy<Dtype>( count_, (cl_mem) (source.gpu_data()), 0, (cl_mem) (data_->mutable_gpu_data()), 0, &viennacl::ocl::get_context(device_->id())); } #endif } break; } case Caffe::CPU: { if (copy_diff) { caffe_cpu_copy(count_, source.cpu_diff(), static_cast<Dtype*>(diff_->mutable_cpu_data())); } else { caffe_cpu_copy(count_, source.cpu_data(), static_cast<Dtype*>(data_->mutable_cpu_data())); } break; } default: LOG(FATAL)<< "Unknown caffe mode."; } }
void BatchTripletLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Blob<Dtype>* feat = bottom[0]; const Dtype* feat_data = feat->cpu_data(); Dtype* feat_diff = feat->mutable_cpu_diff(); int count = feat->count(); int num = feat->num(); int dim = count / num; int agg_step = num * sizeof(Dtype); Dtype * agg_data = (Dtype *)aggregator_->mutable_cpu_data(); caffe_memset(num * agg_step, 0, agg_data); Dtype scale1 = Dtype(2) / triplets_.size() * mu_; for (int i=0; i<triplets_.size(); ++i) { int qry_id = triplets_[i].first_; int pos_id = triplets_[i].second_; int neg_id = triplets_[i].third_; agg_data[qry_id * num + neg_id] += scale1; agg_data[qry_id * num + pos_id] -= scale1; agg_data[pos_id * num + pos_id] += scale1; agg_data[pos_id * num + qry_id] -= scale1; agg_data[neg_id * num + qry_id] += scale1; agg_data[neg_id * num + neg_id] -= scale1; } Dtype scale2 = Dtype(2) / pos_pairs_.size() * (Dtype(1) - mu_); for (int i=0; i<pos_pairs_.size(); ++i) { int qry_id = pos_pairs_[i].first; int pos_id = pos_pairs_[i].second; agg_data[qry_id * num + qry_id] += scale2; agg_data[qry_id * num + pos_id] -= scale2; agg_data[pos_id * num + pos_id] += scale2; agg_data[pos_id * num + qry_id] -= scale2; } caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, num, Dtype(1), agg_data, feat_data, Dtype(0), feat_diff); } }
static void read_blob_from_file(const std::string& file_name, Blob<Dtype>& blob) { std::ifstream file(file_name.c_str(), std::ifstream::binary); if (file.fail()) { ASSERT_FALSE(true); return; } vector<int> shape(4, 0); file.read(reinterpret_cast<char*>(&shape[0]), 4 * sizeof(int)); ASSERT_FALSE(file.fail()); blob.Reshape(shape); file.read(reinterpret_cast<char*>(blob.mutable_cpu_data()), blob.count() * sizeof(Dtype)); ASSERT_FALSE(file.fail()); }
void CaffeMobile::putImage(AndroidBitmapInfo* info, void* pixels, const vector<Blob<float>*>& resImage) { Blob<float> * srcBlob = *resImage.data(); LOG(DEBUG) << "srcBlob received"; vector<int> shape = {1, 3, (int) info->width, (int) info->height }; LOG(DEBUG) << "shape configured"; Blob<float>* imgBlob = new Blob<float>(); LOG(DEBUG) << "Blob created"; imgBlob->Reshape(shape); LOG(DEBUG) << "imgBlob reshaped"; imgBlob->CopyFrom(*srcBlob, false, true); LOG(DEBUG) << "imgBlob copied"; int size = imgBlob->count(); LOG(DEBUG) << "imgBlob size is: " << size; /*Partially from https://github.com/ruckus/android-image-filter-ndk*/ uint32_t* pixelRow; int ix, iy, red, green, blue; for(iy = 0; iy < (int) info->height; iy++){ pixelRow = (uint32_t*) pixels; for(ix =0; ix < (int) info->width; ix++){ red = (int) clip(imgBlob->data_at(0,0,iy,ix), 0, 255); green = (int) clip(imgBlob->data_at(0,1,iy,ix), 0, 255); blue = (int) clip(imgBlob->data_at(0,2,iy,ix), 0, 255); pixelRow[ix] = ((red << 16) & 0x00FF0000) | ((green << 8) & 0x0000FF00) | (blue & 0x000000FF); } pixels = (char*)pixels + info->stride; } LOG(DEBUG) << "before return putImage " << size; return; }
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { if (source.count() != count_ || source.shape() != shape_) { if (reshape) { ReshapeLike(source); } else { LOG(FATAL) << "Trying to copy blobs of different sizes."; } } if (copy_diff) { caffe_copy(count_, source.cpu_diff(), static_cast<Dtype*>(diff_->mutable_cpu_data())); } else { caffe_copy(count_, source.cpu_data(), static_cast<Dtype*>(data_->mutable_cpu_data())); } }
AsyncParamServer<Dtype>::AsyncParamServer(boost::shared_ptr<Solver<Dtype> > solver) : recv_tasks_iter_(0), solver_(solver), send_cnt_(0), update_cnt_(0) { // setup the mpi buffers and recv task vector int mpi_rank = get_node_rank(); shared_ptr<Net<Dtype>> net = solver_->net(); const vector<Blob<Dtype> *> &net_params = net->learnable_params(); const std::vector<bool>& layer_need_backward{ net->layer_need_backward() }; for (int i = 0; i < get_num_groups(); i++) { int root_rank = get_group_root_rank(i); //iterate over layers and skip the ones without params for (int j = 0; j < net->layers().size(); j++) { shared_ptr<Layer<Dtype>> layer = net->layers()[j]; if (!layer_need_backward[j]) continue; const MultinodeLayerParameter & mn_layer_param = layer->layer_param().multinode(); int model_parts = mn_layer_param.model_parts(); int mn_num_nodes = mn_layer_param.num_nodes(); GetCanonicalMnParam(mn_num_nodes, model_parts); vector<int> layer_param_ids = net->get_layer_learnable_param_ids(j); for (int k = 0; k < layer_param_ids.size(); k++) { int param_id = layer_param_ids[k]; if (!layer->ParamNeedReduce(k)) continue; if (param_to_server_rank(j, param_id) != mpi_rank) continue; Blob<Dtype> *blob = net_params[param_id]; // Setup buf for recv Dtype* buf = (Dtype*)std::malloc(sizeof(Dtype) * blob->count()); recv_buf_[make_pair(root_rank, param_id)] = make_pair(buf, blob->count()); for (int part_id = 0; part_id < model_parts; part_id++) { int part_root_rank = get_group_root_rank(i, part_id, model_parts); int64_t part_offset = part_id * blob->count() / model_parts; TaskRequest recv_task(part_root_rank, j, param_id, part_id, model_parts); recv_tasks_.push_back(recv_task); rank_layer_blob_to_vec_pos[make_pair(part_root_rank, param_id)] = recv_tasks_.size() - 1; MPI_Irecv(buf + part_offset, blob->count() / model_parts, DtypeToMPIDtype<Dtype>(), part_root_rank, recv_task.GetTag(), MPI_COMM_WORLD, &(recv_tasks_[recv_tasks_.size() - 1].mpi_request_)); async_iter_[make_pair(param_id, part_id)] = solver_->iter(); } // Setup buf for send buf = (Dtype*)std::malloc(sizeof(Dtype) * blob->count()); send_buf_[make_pair(root_rank, param_id)] = make_pair(buf, blob->count()); } } } // assumed iter is started from 0 total_update_ = total_send_ = recv_tasks_.size() * solver_->param().max_iter(); }
void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const bool propagate_down, vector<Blob<Dtype>*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); if (concat_dim_ == 0) { int offset_num = 0; for (int i = 0; i < bottom->size(); ++i) { Blob<Dtype>* blob = (*bottom)[i]; Dtype* bottom_diff = blob->mutable_cpu_diff(); caffe_copy(blob->count(), top_diff+top[0]->offset(offset_num), bottom_diff); offset_num += blob->num(); } } else if (concat_dim_ == 1) { int offset_channel = 0; for (int i = 0; i < bottom->size(); ++i) { Blob<Dtype>* blob = (*bottom)[i]; Dtype* bottom_diff = blob->mutable_cpu_diff(); int num_elem = blob->channels()*blob->height()*blob->width(); for (int n = 0; n < num_; ++n) { caffe_copy(num_elem, top_diff+top[0]->offset(n, offset_channel), bottom_diff+blob->offset(n)); } offset_channel += blob->channels(); } }else if (concat_dim_ == 4){// lipengyu add int top_bias = 0; for(int n = 0 ; n < num_ ; n++) { for(int i = 0 ; i < bottom->size() ; i++) { Blob<Dtype>* blob = (*bottom)[i]; Dtype* bottom_diff = blob->mutable_cpu_diff(); int num_elem = blob->channels()*blob->height()*blob->width(); caffe_copy(num_elem, top_diff+ top_bias,//top[0]->offset(n, offset_channel), bottom_diff+blob->offset(n)); top_bias += num_elem; } } }else { LOG(FATAL) << "concat_dim along dim" << concat_dim_ << " not implemented yet"; } }
TYPED_TEST(DataTransformTest, TestMeanValue) { TransformationParameter transform_param; const bool unique_pixels = false; // pixels are equal to label const int label = 0; const int channels = 3; const int height = 4; const int width = 5; const int mean_value = 2; transform_param.add_mean_value(mean_value); Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width); DataTransformer<TypeParam>* transformer = new DataTransformer<TypeParam>(transform_param, TEST); transformer->InitRand(); transformer->Transform(datum, blob); for (int j = 0; j < blob->count(); ++j) { EXPECT_EQ(blob->cpu_data()[j], label - mean_value); } }
TYPED_TEST(DataTransformTest, TestMeanFile) { TransformationParameter transform_param; const bool unique_pixels = true; // pixels are consecutive ints [0,size] const int_tp label = 0; const int_tp channels = 3; const int_tp height = 4; const int_tp width = 5; const int_tp size = channels * height * width; // Create a mean file string* mean_file = new string(); MakeTempFilename(mean_file); BlobProto blob_mean; blob_mean.set_num(1); blob_mean.set_channels(channels); blob_mean.set_height(height); blob_mean.set_width(width); for (int_tp j = 0; j < size; ++j) { blob_mean.add_data(j); } LOG(INFO) << "Using temporary mean_file " << *mean_file; WriteProtoToBinaryFile(blob_mean, *mean_file); transform_param.set_mean_file(*mean_file); Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width); DataTransformer<TypeParam>* transformer = new DataTransformer<TypeParam>(transform_param, TEST, Caffe::GetDefaultDevice()); transformer->InitRand(); transformer->Transform(datum, blob); for (int_tp j = 0; j < blob->count(); ++j) { EXPECT_EQ(blob->cpu_data()[j], 0); } }
TYPED_TEST(DataTransformTest, TestEmptyTransform) { TransformationParameter transform_param; const bool unique_pixels = false; // all pixels the same equal to label const int label = 0; const int channels = 3; const int height = 4; const int width = 5; Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width); DataTransformer<TypeParam>* transformer = new DataTransformer<TypeParam>(transform_param, TEST); transformer->InitRand(); transformer->Transform(datum, blob); EXPECT_EQ(blob->num(), 1); EXPECT_EQ(blob->channels(), datum.channels()); EXPECT_EQ(blob->height(), datum.height()); EXPECT_EQ(blob->width(), datum.width()); for (int j = 0; j < blob->count(); ++j) { EXPECT_EQ(blob->cpu_data()[j], label); } }
TYPED_TEST(DataTransformTest, TestEmptyTransformUniquePixels) { TransformationParameter transform_param; const bool unique_pixels = true; // pixels are consecutive ints [0,size] const int label = 0; const int channels = 3; const int height = 4; const int width = 5; Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); Blob<TypeParam>* blob = new Blob<TypeParam>(1, 3, 4, 5); DataTransformer<TypeParam>* transformer = new DataTransformer<TypeParam>(transform_param, TEST); transformer->InitRand(); transformer->Transform(datum, blob); EXPECT_EQ(blob->num(), 1); EXPECT_EQ(blob->channels(), datum.channels()); EXPECT_EQ(blob->height(), datum.height()); EXPECT_EQ(blob->width(), datum.width()); for (int j = 0; j < blob->count(); ++j) { EXPECT_EQ(blob->cpu_data()[j], j); } }
void project(const Template &src, Template &dst) const { CaffeNet *net = caffeResource.acquire(); if (net->layers()[0]->layer_param().type() != "MemoryData") qFatal("OpenBR requires the first layer in the network to be a MemoryDataLayer"); MemoryDataLayer<float> *dataLayer = static_cast<MemoryDataLayer<float> *>(net->layers()[0].get()); if (src.size() != dataLayer->batch_size()) qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0)); net->ForwardPrefilled(); Blob<float> *output = net->blobs().back().get(); int dimFeatures = output->count() / dataLayer->batch_size(); for (int n = 0; n < dataLayer->batch_size(); n++) dst += Mat(1, dimFeatures, CV_32FC1, output->mutable_cpu_data() + output->offset(n)).clone(); caffeResource.release(net); }
void ScaleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (bias_layer_ && this->param_propagate_down_[this->param_propagate_down_.size() - 1]) { bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_); } const bool scale_param = (bottom.size() == 1); Blob<Dtype>* scale = scale_param ? this->blobs_[0].get() : bottom[1]; if ((!scale_param && propagate_down[1]) || (scale_param && this->param_propagate_down_[0])) { const Dtype* top_diff = top[0]->cpu_diff(); const bool in_place = (bottom[0] == top[0]); const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data(); // Hack: store big eltwise product in bottom[0] diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scale diff, and we're done. // If we're computing in-place (and not doing eltwise computation), this // hack doesn't work and we store the product in temp_. const bool is_eltwise = (bottom[0]->count() == scale->count()); Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() : (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff())); caffe_mul(top[0]->count(), top_diff, bottom_data, product); if (!is_eltwise) { Dtype* sum_result = NULL; if (inner_dim_ == 1) { sum_result = product; } else if (sum_result_.count() == 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scale_diff = scale->mutable_cpu_diff(); if (scale_param) { Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult); *scale_diff += result; } else { *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult); } } else { const Dtype* sum_mult = sum_multiplier_.cpu_data(); sum_result = (outer_dim_ == 1) ? scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data(); caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, Dtype(1), product, sum_mult, Dtype(0), sum_result); } if (outer_dim_ != 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scale_diff = scale->mutable_cpu_diff(); if (scale_dim_ == 1) { if (scale_param) { Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); *scale_diff += result; } else { *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); } } else { caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_, Dtype(1), sum_result, sum_mult, Dtype(scale_param), scale_diff); } } } } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* scale_data = scale->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); for (int n = 0; n < outer_dim_; ++n) { for (int d = 0; d < scale_dim_; ++d) { const Dtype factor = scale_data[d]; caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff); bottom_diff += inner_dim_; top_diff += inner_dim_; } } } }
void Blob<Dtype>::ShareData(const Blob& other) { CHECK_EQ(count_, other.count()); data_ = other.data(); }
void Blob<Dtype>::ShareDiff(const Blob& other) { CHECK_EQ(count_, other.count()); diff_ = other.diff(); }
void EvalDetectionLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { //const Dtype* input_data = bottom[0]->cpu_data();// 网络输出 N*13*13*125 const Dtype* label_data = bottom[1]->cpu_data(); // 真实标签数据 N*30*5 //LOG(INFO) << bottom[0]->data_at(0,0,0,0) << " " << bottom[0]->data_at(0,0,0,1); Blob<Dtype> swap;// 网络输出数据 N * 13* 13* 125 // 变形为 N * (13 * 13) * 5 * 25 // N*(5*(5+num_class_))*13*13 -> N * (13*13) * 5 * (5+num_class_) swap.Reshape(bottom[0]->num(), bottom[0]->height()*bottom[0]->width(), num_object_, bottom[0]->channels()/num_object_); Dtype* swap_data = swap.mutable_cpu_data();// cpu上的数据 caffe_set(swap.count(), Dtype(0.0), swap_data);// 设置为0 int index = 0; for (int b = 0; b < bottom[0]->num(); ++b)// 图片数量 for (int h = 0; h < bottom[0]->height(); ++h) // 格子 13 for (int w = 0; w < bottom[0]->width(); ++w)// 格子 13 for (int c = 0; c < bottom[0]->channels(); ++c)// 5*25=125 { swap_data[index++] = bottom[0]->data_at(b,c,h,w); } //*******************************************************************************// //caffe_set(swap.count(), Dtype(0.0), swap_data);// 设置为0 //int p_index = (7*13+4)*125; //swap_data[p_index]=-0.1020; //swap_data[p_index+1]=2.0867; //swap_data[p_index+2]=1.612; //swap_data[p_index+3]=1.0515; //swap_data[p_index+4]=1.0; //swap_data[p_index+5+11]=100; //*******************************************************************************// Dtype* top_data = top[0]->mutable_cpu_data();// 层输出 cpu数据 caffe_set(top[0]->count(), Dtype(0), top_data);// 设置为0 Dtype all_mAP = 0.0;// 总 batch 的 mAP for (int i = 0; i < bottom[0]->num(); ++i) { // N 图片数量 int input_index = i * bottom[0]->count(1);// 网络输出标签 i * 13*13*125 int true_index = i * bottom[1]->count(1);// 真实标签 i * 30*5 int top_index = i * top[0]->count(1); // 输出数据 // i * ( 20 + 13*13*5*4 + 1) -> i * (13*13*5*4 + 1) // 前面20个为 真实标签 物体类别出现的次数 // 获取真实边框 ========================================= map<int, vector<BoxData> > gt_boxes; // 从 对应图片的标签数据中 获取 真实边框 label_ + score_ + box_ // 返回一张图像的 标签 + 多边框数据的 标签 GetGTBox(side_, label_data + true_index, >_boxes); // 在输出数据中 记录 真实标签 物体类别出现的次数======================= for (std::map<int, vector<BoxData > >::iterator it = gt_boxes.begin(); it != gt_boxes.end(); ++it) { // 遍历 每一个 真实的标签 //int label = it->first;// 标签 类别 vector<BoxData>& g_boxes = it->second;// BoxData: label_ + score_ + box_ for (int j = 0; j < g_boxes.size(); ++j) {// 边框数量 // 输出数据中的前 20个================================================ // ======================================= // top_data[top_index + label] += 1; // 真实标签 物体类别出现的次数 } } // 获取预测边框 ============================================= map<int, vector<BoxData> > pred_boxes; // 获取预测边框 13*13*5 -> NMS抑制+置信度抑制 -> pred_boxes(数量少很多) //GetPredBox(side_, num_object_, num_class_, input_data + input_index, &pred_boxes, sqrt_, constriant_, score_type_, nms_); GetPredBox(side_, num_object_, num_class_, swap_data + input_index, &pred_boxes, score_type_, nms_, biases_); // 输出数据 后面 的 13*13*5*4 ============================= // int index = top_index + num_class_ + 1;// 20 + 1 之后为 上面的 (label + score + tp + fp) 参数 //============================= int index = top_index + 1;// 1个 map 之后为 上面的 (label + score + tp + fp) 参数 int pred_count(0);// Dtype mAP = 0.0; int pre_clas_num=0; //float AP = 0.0; // int tp // 遍历预测值的 每一类,与最合适的标签边框计算AP,在计算总类别的mAP============ for (std::map<int, vector<BoxData> >::iterator it = pred_boxes.begin(); it != pred_boxes.end(); ++it) { Dtype AP = 0.0;// 该类AP int tp=0;// 预测正确 int fp=0;// 预测错误 ++pre_clas_num;// 该图片预测的总类别数量 int label = it->first;// 预测 边框标签 vector<BoxData>& p_boxes = it->second;// 多个边框 vector<BoxData> // 真实标签中 未找到该 类别======================================= if (gt_boxes.find(label) == gt_boxes.end()) { for (int b = 0; b < p_boxes.size(); ++b) {// 该类别下的每一个预测边框 top_data[index + pred_count * 4 + 0] = p_boxes[b].label_;// 标签 top_data[index + pred_count * 4 + 1] = p_boxes[b].score_;// 得分 top_data[index + pred_count * 4 + 2] = 0; //tp top_data[index + pred_count * 4 + 3] = 1; //fp 错误的预测为正确的 ++pred_count; ++fp;// 预测错误============================================ } if(tp + fp) AP = tp / (tp + fp);// 计算该类别的 平均准确度 这里等于0 mAP += AP;// 所有类别的总 AP 这里可以省略 因为 AP为0 continue;// 跳过预测错误的,只记录fp } // 真实标签找到了该预测的类别====================================== vector<BoxData>& g_boxes = gt_boxes[label];// 真实标签中该 类别的 多个真实边框===== vector<bool> records(g_boxes.size(), false);// 记录 真实类别的每一个 物体边框 是否已经被预测过 for (int k = 0; k < p_boxes.size(); ++k) { // 遍历 每个 预测边框============== top_data[index + pred_count * 4 + 0] = p_boxes[k].label_;// 标签 top_data[index + pred_count * 4 + 1] = p_boxes[k].score_;// 得分 Dtype max_iou(-1);// 预测边框最接近的 真实边框 iou int idx(-1);// 对应的 真实边框id // 遍历每个真实边框 找到 预测边框最接近的 真实边框=========== for (int g = 0; g < g_boxes.size(); ++g) { Dtype iou = Calc_iou(p_boxes[k].box_, g_boxes[g].box_);// 计算交并比 if (iou > max_iou) { max_iou = iou;// 记录 每个预测边框 最接近的 真实边框 的 IOU idx = g;// 对应的 真实边框id } } // 根据 交并比 确定判断 预测 正确/错误 if (max_iou >= threshold_) { if ( !records[idx] ) { records[idx] = true;// 对应的 真实边框id top_data[index + pred_count * 4 + 2] = 1; // tp 正->正确 top_data[index + pred_count * 4 + 3] = 0; // fp ++tp;// 预测正确 ================================= } else {// 同一个位置的物体之前已经被预测过,又一个框来预测,则认为是错误的 top_data[index + pred_count * 4 + 2] = 0; top_data[index + pred_count * 4 + 3] = 1;// 错误 -> 正确 ++fp;// 预测错误============================================ } } ++pred_count; } if(tp + fp) AP = tp / (tp + fp);// 计算该类别的 平均准确度 mAP += AP;// 所有类别的总 AP } if(pre_clas_num){ mAP /= pre_clas_num; // mAP /= num_class_; // 计算 mAP 是除以总类别数,还是总预测类别数 } else mAP = 0.0; // 输出对应图片的mAP top_data[ index - 1 ] = mAP; all_mAP += mAP; } if(bottom[0]->num()) all_mAP /= (bottom[0]->num()); top_data[0] = all_mAP; }