Beispiel #1
0
	void conv_col2im_gpu(const Dtype* col_buff, Dtype* data){
		if (!force_nd_im2col&&num_spatial_axes == 2){
			col2im_gpu(col_buff, conv_in_channels, conv_input_shape.cpu_data()[1], conv_input_shape.cpu_data()[2],
				kernel_shape.cpu_data()[0], kernel_shape.cpu_data()[1], pad.cpu_data()[0], pad.cpu_data()[1],
				stride.cpu_data()[0], stride.cpu_data()[1], data);
		}
	}
Beispiel #2
0
 inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
   if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
     col2im_gpu(col_buff, conv_in_channels_,
         conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
         kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
         pad_.cpu_data()[0], pad_.cpu_data()[1],
         stride_.cpu_data()[0], stride_.cpu_data()[1],
         dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
   } else {
     col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,
         conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
         kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
         dilation_.gpu_data(), data);
   }
 }
Beispiel #3
0
void LRNLayerTest<Dtype>::ReferenceLRNForward(
    const Blob<Dtype>& blob_bottom, const LayerParameter& layer_param,
    Blob<Dtype>* blob_top) {
  blob_top->Reshape(blob_bottom.num(), blob_bottom.channels(),
      blob_bottom.height(), blob_bottom.width());
  const Dtype* bottom_data = blob_bottom.cpu_data();
  Dtype* top_data = blob_top->mutable_cpu_data();
  Dtype alpha = layer_param.alpha();
  Dtype beta = layer_param.beta();
  int size = layer_param.local_size();
  for (int n = 0; n < blob_bottom.num(); ++n) {
    for (int c = 0; c < blob_bottom.channels(); ++c) {
      for (int h = 0; h < blob_bottom.height(); ++h) {
        for (int w = 0; w < blob_bottom.width(); ++w) {
          int c_start = c - (size - 1) / 2;
          int c_end = min(c_start + size, blob_bottom.channels());
          c_start = max(c_start, 0);
          Dtype scale = 1.;
          for (int i = c_start; i < c_end; ++i) {
            Dtype value = blob_bottom.data_at(n, i, h, w);
            scale += value * value * alpha / size;
          }
          *(top_data + blob_top->offset(n, c, h, w)) =
            blob_bottom.data_at(n, c, h, w) / pow(scale, beta);
        }
      }
    }
  }
}
Beispiel #4
0
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
  if (num_ != source.num() || channels_ != source.channels() ||
      height_ != source.height() || width_ != source.width()) {
    if (reshape) {
      Reshape(source.num(), source.channels(), source.height(), source.width());
    } else {
      LOG(FATAL) << "Trying to copy blobs of different sizes.";
    }
  }
  switch (Caffe::mode()) {
  case Caffe::GPU:
    if (copy_diff) {
      caffe_copy(count_, source.gpu_diff(),
          static_cast<Dtype*>(diff_->mutable_gpu_data()));
    } else {
      caffe_copy(count_, source.gpu_data(),
          static_cast<Dtype*>(data_->mutable_gpu_data()));
    }
    break;
  case Caffe::CPU:
    if (copy_diff) {
      caffe_copy(count_, source.cpu_diff(),
          static_cast<Dtype*>(diff_->mutable_cpu_data()));
    } else {
      caffe_copy(count_, source.cpu_data(),
          static_cast<Dtype*>(data_->mutable_cpu_data()));
    }
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
}
TYPED_TEST(DataTransformTest, TestCropSize) {
  TransformationParameter transform_param;
  const bool unique_pixels = false;  // all pixels the same equal to label
  const int_tp label = 0;
  const int_tp channels = 3;
  const int_tp height = 4;
  const int_tp width = 5;
  const int_tp crop_size = 2;

  transform_param.set_crop_size(crop_size);
  Datum datum;
  FillDatum(label, channels, height, width, unique_pixels, &datum);
  DataTransformer<TypeParam>* transformer =
      new DataTransformer<TypeParam>(transform_param, TEST,
                                     Caffe::GetDefaultDevice());
  transformer->InitRand();
  Blob<TypeParam>* blob =
      new Blob<TypeParam>(1, channels, crop_size, crop_size);
  for (int_tp iter = 0; iter < this->num_iter_; ++iter) {
    transformer->Transform(datum, blob);
    EXPECT_EQ(blob->num(), 1);
    EXPECT_EQ(blob->channels(), datum.channels());
    EXPECT_EQ(blob->height(), crop_size);
    EXPECT_EQ(blob->width(), crop_size);
    for (int_tp j = 0; j < blob->count(); ++j) {
      EXPECT_EQ(blob->cpu_data()[j], label);
    }
  }
}
TYPED_TEST(DataTransformTest, TestMeanValues) {
  TransformationParameter transform_param;
  const bool unique_pixels = false;  // pixels are equal to label
  const int_tp label = 0;
  const int_tp channels = 3;
  const int_tp height = 4;
  const int_tp width = 5;

  transform_param.add_mean_value(0);
  transform_param.add_mean_value(1);
  transform_param.add_mean_value(2);
  Datum datum;
  FillDatum(label, channels, height, width, unique_pixels, &datum);
  Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width);
  DataTransformer<TypeParam>* transformer =
      new DataTransformer<TypeParam>(transform_param, TEST,
                                     Caffe::GetDefaultDevice());
  transformer->InitRand();
  transformer->Transform(datum, blob);
  for (int_tp c = 0; c < channels; ++c) {
    for (int_tp j = 0; j < height * width; ++j) {
      EXPECT_EQ(blob->cpu_data()[blob->offset(0, c) + j], label - c);
    }
  }
}
Beispiel #7
0
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
  if (source.count() != count_ || source.shape() != shape_) {
    if (reshape) {
      ReshapeLike(source);
    } else {
      LOG(FATAL) << "Trying to copy blobs of different sizes.";
    }
  }
  switch (Caffe::mode()) {
  case Caffe::GPU:
    if (copy_diff) {
      caffe_copy(count_, source.gpu_diff(),
          static_cast<Dtype*>(diff_->mutable_gpu_data()));
    } else {
      caffe_copy(count_, source.gpu_data(),
          static_cast<Dtype*>(data_->mutable_gpu_data()));
    }
    break;
  case Caffe::CPU:
    if (copy_diff) {
      caffe_copy(count_, source.cpu_diff(),
          static_cast<Dtype*>(diff_->mutable_cpu_data()));
    } else {
      caffe_copy(count_, source.cpu_data(),
          static_cast<Dtype*>(data_->mutable_cpu_data()));
    }
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
}
Beispiel #8
0
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
  if (num_ != source.num() || channels_ != source.channels() ||
      height_ != source.height() || width_ != source.width()) {
    if (reshape) {
      Reshape(source.num(), source.channels(), source.height(), source.width());
    } else {
      LOG(FATAL) << "Trying to copy blobs of different sizes.";
    }
  }
  switch (Caffe::mode()) {
#if 0
  case Caffe::GPU:
    if (copy_diff) {
      CUDA_CHECK(cudaMemcpy(diff_->mutable_gpu_data(), source.gpu_diff(),
          sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
    } else {
      CUDA_CHECK(cudaMemcpy(data_->mutable_gpu_data(), source.gpu_data(),
          sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
    }
    break;
#endif
  case Caffe::CPU:
    if (copy_diff) {
      memcpy(diff_->mutable_cpu_data(), source.cpu_diff(),
          sizeof(Dtype) * count_);
    } else {
      memcpy(data_->mutable_cpu_data(), source.cpu_data(),
        sizeof(Dtype) * count_);
    }
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
}
  void TestForward() {
    // Get the loss without a specified objective weight -- should be
    // equivalent to explicitly specifiying a weight of 1.
    LayerParameter layer_param;
    layer_param.mutable_multi_t_loss_param()->set_num_center(N_);
    EntropyTLossLayer<Dtype> layer_weight_1(layer_param);
    layer_weight_1.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
    layer_weight_1.Forward(this->blob_bottom_vec_, &this->blob_top_vec_);

    FillerParameter filler_param;
    GaussianFiller<Dtype> filler2(filler_param);
    filler2.Fill(layer_weight_1.blobs()[0].get());
    caffe_rng_uniform(layer_weight_1.blobs()[1]->count(), Dtype(0.9), Dtype(1.1), layer_weight_1.blobs()[1]->mutable_cpu_data());
    layer_weight_1.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);

    const Dtype loss_weight_1 =
        layer_weight_1.Forward(this->blob_bottom_vec_, &this->blob_top_vec_);

    // Get the loss again with a different objective weight; check that it is
    // scaled appropriately.
    const Dtype kLossWeight = 3.7;
    LayerParameter layer_param2;
    layer_param2.mutable_multi_t_loss_param()->set_num_center(N_);
    layer_param2.add_loss_weight(kLossWeight);
    EntropyTLossLayer<Dtype> layer_weight_2(layer_param2);
    layer_weight_2.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
    layer_weight_2.Forward(this->blob_bottom_vec_, &this->blob_top_vec_);

    caffe_copy(layer_weight_2.blobs()[0]->count(), layer_weight_1.blobs()[0]->cpu_data(), 
      layer_weight_2.blobs()[0]->mutable_cpu_data());
    caffe_copy(layer_weight_2.blobs()[1]->count(), layer_weight_1.blobs()[1]->cpu_data(), 
      layer_weight_2.blobs()[1]->mutable_cpu_data());
    layer_weight_2.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);

    const Dtype loss_weight_2 =
        layer_weight_2.Forward(this->blob_bottom_vec_, &this->blob_top_vec_);
    const Dtype kErrorMargin = 1e-3;
    EXPECT_NEAR(loss_weight_1 * kLossWeight, loss_weight_2, kErrorMargin);
    // Make sure the loss is non-trivial.
    const Dtype kNonTrivialAbsThresh = 1e-1;
    EXPECT_GE(fabs(loss_weight_1), kNonTrivialAbsThresh);

    int m = M_, n = layer_param.multi_t_loss_param().num_center(), p = K_;
    Blob<Dtype> *distance = layer_weight_1.distance();
    const Dtype *cpu_data = blob_bottom_data_->cpu_data();
    const Dtype *cpu_dist = distance->cpu_data();
    const Dtype *cpu_center = layer_weight_1.blobs()[0]->cpu_data();
    const Dtype *cpu_sigma = layer_weight_1.blobs()[1]->cpu_data();
    for (int i = 0; i < m; ++i) {
      for (int j = 0; j < n; ++j) {
        Dtype acc = Dtype(0);
        for (int k = 0; k < p; ++k) {
          acc += (cpu_data[i*p + k] - cpu_center[k*n + j])*(cpu_data[i*p + k] - cpu_center[k*n + j])*cpu_sigma[k*n+j];
        }
        EXPECT_NEAR(acc, cpu_dist[i*n + j], kErrorMargin) << i << " " << j;
      }
    }
  }
Beispiel #10
0
	//	wrap im2col using param in this class
	//	data is 3D(channels,height,width), col_buff is 3D(channels
	void conv_im2col_cpu(const Dtype* data, Dtype* col_buff){
		// only implements conv2D
		if (!force_nd_im2col&&num_spatial_axes == 2){
			//	im2col transform the input into the form which is convenient for convolution
			//	use conv_xxx cause dimensions could reverse in reshape(), we need dynamic input
			im2col_cpu(data, conv_in_channels, conv_input_shape.cpu_data()[1], conv_input_shape.cpu_data()[2],
				kernel_shape.cpu_data()[0], kernel_shape.cpu_data()[1], pad.cpu_data()[0], pad.cpu_data()[1],
				stride.cpu_data()[0], stride.cpu_data()[1], col_buff);
		}
	}
Beispiel #11
0
  inline void greentea_conv_im2col_gpu(const Dtype* data, const int_tp data_off,
                                       Dtype* col_buff,
                                       const int_tp col_buff_off) {
    viennacl::ocl::context &ctx = viennacl::ocl::get_context(
        this->device_->id());
    viennacl::ocl::program &program = this->device_->program();

    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
      greentea_im2col_gpu<Dtype>(&program, &ctx, (cl_mem) data, data_off,
                                 conv_in_channels_,
                                 conv_input_shape_.cpu_data()[1],
                                 conv_input_shape_.cpu_data()[2],
                                 kernel_shape_.cpu_data()[0],
                                 kernel_shape_.cpu_data()[1],
                                 pad_.cpu_data()[0], pad_.cpu_data()[1],
                                 stride_.cpu_data()[0], stride_.cpu_data()[1],
                                 dilation_.cpu_data()[0],
                                 dilation_.cpu_data()[1], (cl_mem) col_buff,
                                 col_buff_off);
    } else {
      greentea_im2col_nd_gpu<Dtype>(&program, &ctx, (cl_mem) data, data_off,
                                    num_spatial_axes_,
                                    (int_tp)0,
                                    num_kernels_im2col_,
                                    (cl_mem) (conv_input_shape_.gpu_data()),
                                    (cl_mem) (col_buffer_.gpu_shape()),
                                    (cl_mem) (kernel_shape_.gpu_data()),
                                    (cl_mem) (pad_.gpu_data()),
                                    (cl_mem) (stride_.gpu_data()),
                                    (cl_mem) (dilation_.gpu_data()),
                                    (cl_mem) col_buff, col_buff_off);
    }
  }
Beispiel #12
0
void hdf5_save_nd_dataset<double>(
    const hid_t file_id, const string& dataset_name, const Blob<double>& blob) {
  hsize_t dims[HDF5_NUM_DIMS];
  dims[0] = blob.num();
  dims[1] = blob.channels();
  dims[2] = blob.height();
  dims[3] = blob.width();
  herr_t status = H5LTmake_dataset_double(
      file_id, dataset_name.c_str(), HDF5_NUM_DIMS, dims, blob.cpu_data());
  CHECK_GE(status, 0) << "Failed to make double dataset " << dataset_name;
}
Beispiel #13
0
  int NumSequenceMatches(const TransformationParameter transform_param,
      const Datum& datum, Phase phase) {
    // Get crop sequence with Caffe seed 1701.
    DataTransformer<Dtype>* transformer =
        new DataTransformer<Dtype>(transform_param, phase);
    const int crop_size = transform_param.crop_size();
    int crop_h = transform_param.crop_h();
    int crop_w = transform_param.crop_w();
    if (crop_size > 0) {
      crop_h = crop_w = crop_size;
    }
    Caffe::set_random_seed(seed_);
    transformer->InitRand();
    Blob<Dtype>* blob =
        new Blob<Dtype>(1, datum.channels(), datum.height(), datum.width());
    if (crop_h > 0 || crop_w > 0) {
      blob->Reshape(1, datum.channels(), crop_h, crop_w);
    }

    vector<vector<Dtype> > crop_sequence;
    for (int iter = 0; iter < this->num_iter_; ++iter) {
      vector<Dtype> iter_crop_sequence;
      transformer->Transform(datum, blob);
      for (int j = 0; j < blob->count(); ++j) {
        iter_crop_sequence.push_back(blob->cpu_data()[j]);
      }
      crop_sequence.push_back(iter_crop_sequence);
    }
    // Check if the sequence differs from the previous
    int num_sequence_matches = 0;
    for (int iter = 0; iter < this->num_iter_; ++iter) {
      vector<Dtype> iter_crop_sequence = crop_sequence[iter];
      transformer->Transform(datum, blob);
      for (int j = 0; j < blob->count(); ++j) {
        num_sequence_matches +=
            (crop_sequence[iter][j] == blob->cpu_data()[j]);
      }
    }
    return num_sequence_matches;
  }
 inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
   if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
     im2col_gpu(data, conv_in_channels_,
         conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
         kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
         pad_.cpu_data()[0], pad_.cpu_data()[1],
         stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
   } else {
     im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_,
         conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
         kernel_shape_.gpu_data(), pad_.gpu_data(),
         stride_.gpu_data(), col_buff);
   }
 }
Beispiel #15
0
void LocalLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {

  Dtype* x_data = col_buffer_.mutable_cpu_data();
  const Dtype* weight = this->blobs_[0]->cpu_data();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();

  Blob<Dtype> E;
  E.Reshape(1, 1, 1, K_);
  FillerParameter filler_param;
  filler_param.set_value(1);
  ConstantFiller<Dtype> filler(filler_param);
  filler.Fill(&E);

  Blob<Dtype> intermediate;
  intermediate.Reshape(1, 1, K_, N_);
  for (int n=0; n<num_; n++) {
    im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
               width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data);

    for (int m=0; m<num_output_; m++) { 
      caffe_mul(K_*N_, x_data, weight+this->blobs_[0]->offset(m),
                intermediate.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, 1, N_, K_,
                            (Dtype)1., E.cpu_data(),
                            intermediate.cpu_data(),
                            (Dtype)0., top_data + top[0]->offset(n, m));
    }

    if (bias_term_) {
      caffe_add(M_ * N_, this->blobs_[1]->cpu_data(),
                top_data + top[0]->offset(n),
                top_data + top[0]->offset(n));
    }
  }
}
static void write_blob_to_file(const std::string& file_name,
        const Blob<Dtype>& blob) {
  std::ofstream file(file_name.c_str(), std::ios::out | std::ios::binary);
  if (file.fail())  {
      ASSERT_FALSE(true);
      return;
  }
  file.write(reinterpret_cast<const char*>(&blob.shape()[0]), 4 * sizeof(int));
  ASSERT_FALSE(file.fail());
  file.write(reinterpret_cast<const char*>(blob.cpu_data()),
          blob.count() * sizeof(Dtype));
  ASSERT_FALSE(file.fail());
  file.close();
}
void BatchTripletLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    Blob<Dtype>* feat = bottom[0];
    const Dtype* feat_data = feat->cpu_data();
    Dtype* feat_diff = feat->mutable_cpu_diff();
    int count = feat->count();
    int num = feat->num();
    int dim = count / num;
    int agg_step = num * sizeof(Dtype);
    Dtype * agg_data = (Dtype *)aggregator_->mutable_cpu_data();
    caffe_memset(num * agg_step, 0, agg_data);

    Dtype scale1 = Dtype(2) / triplets_.size() * mu_;
    for (int i=0; i<triplets_.size(); ++i) {
      int qry_id = triplets_[i].first_;
      int pos_id = triplets_[i].second_;
      int neg_id = triplets_[i].third_;

      agg_data[qry_id * num + neg_id] += scale1;
      agg_data[qry_id * num + pos_id] -= scale1;

      agg_data[pos_id * num + pos_id] += scale1;
      agg_data[pos_id * num + qry_id] -= scale1;

      agg_data[neg_id * num + qry_id] += scale1;
      agg_data[neg_id * num + neg_id] -= scale1;
    }

    Dtype scale2 = Dtype(2) / pos_pairs_.size() * (Dtype(1) - mu_);
    for (int i=0; i<pos_pairs_.size(); ++i) {
      int qry_id = pos_pairs_[i].first;
      int pos_id = pos_pairs_[i].second;

      agg_data[qry_id * num + qry_id] += scale2;
      agg_data[qry_id * num + pos_id] -= scale2;

      agg_data[pos_id * num + pos_id] += scale2;
      agg_data[pos_id * num + qry_id] -= scale2;
    }

    caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, num,
        Dtype(1), agg_data, feat_data, Dtype(0), feat_diff);
  }
}
Beispiel #18
0
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
  if (source.count() != count_ || source.shape() != shape_) {
    if (reshape) {
      ReshapeLike(source);
    } else {
      LOG(FATAL)<< "Trying to copy blobs of different sizes.";
    }
  }
  switch (Caffe::mode()) {
    case Caffe::GPU: {
      if (device_->backend() == BACKEND_CUDA) {
        if (copy_diff) {
          caffe_copy(count_, source.gpu_diff(),
              static_cast<Dtype*>(diff_->mutable_gpu_data()));
        } else {
          caffe_copy(count_, source.gpu_data(),
              static_cast<Dtype*>(data_->mutable_gpu_data()));
        }
      } else {
#ifdef USE_GREENTEA
        if (copy_diff) {
          greentea_copy<Dtype>(
              count_, (cl_mem) (source.gpu_diff()), 0,
              (cl_mem) (diff_->mutable_gpu_data()), 0,
              &viennacl::ocl::get_context(device_->id()));
        } else {
          greentea_copy<Dtype>(
              count_, (cl_mem) (source.gpu_data()), 0,
              (cl_mem) (data_->mutable_gpu_data()), 0,
              &viennacl::ocl::get_context(device_->id()));
        }
#endif
      }
      break;
    }
    case Caffe::CPU: {
      if (copy_diff) {
        caffe_cpu_copy(count_, source.cpu_diff(),
            static_cast<Dtype*>(diff_->mutable_cpu_data()));
      } else {
        caffe_cpu_copy(count_, source.cpu_data(),
            static_cast<Dtype*>(data_->mutable_cpu_data()));
      }
      break;
    }
    default:
    LOG(FATAL)<< "Unknown caffe mode.";
  }
}
Beispiel #19
0
    void AsyncParamServer<Dtype>::ProcessUpdateTask() {
      const vector<Blob<Dtype> *> &net_params = solver_->net()->learnable_params();
      std::deque<TaskRequest> to_update;
      update_queue_mutex_.lock();
      to_update.swap(update_tasks_);
      update_queue_mutex_.unlock();
      while (!to_update.empty() ) {
        TaskRequest task = to_update.front();
        to_update.pop_front();

        // copy to diff in solver
        int root_rank = world_rank_to_root_rank(task.part_root_rank_);
        Blob<Dtype>* blob = net_params[task.param_id_];
        Dtype* solver_diff = blob->mutable_cpu_diff();
        Dtype* mpi_buf = 
          recv_buf_[make_pair(root_rank, task.param_id_)].first;
        int64_t count = 
          recv_buf_[make_pair(root_rank, task.param_id_)].second;
        CHECK(count == blob->count() );
        //copy MPI buffer to solver_diff
        int64_t part_offset = task.part_id_ * count / task.num_parts_;
        caffe_copy(count / task.num_parts_,
                   mpi_buf + part_offset, solver_diff + part_offset);
        // apply update
        int blob_wise_iter = async_iter_[make_pair(task.param_id_, task.part_id_) ];
        solver_->set_iter(blob_wise_iter);

        // TODO: supports partial param update per model parts
        solver_->ApplyUpdate(task.param_id_);

        DLOG(INFO) << "PS (iter " << blob_wise_iter << "): param id=" << task.param_id_ << " weight=" << net_params[task.param_id_]->sumsq_diff();
        DLOG(INFO) << "PS (iter " << blob_wise_iter << "): param id=" << task.param_id_ << " data=" << net_params[task.param_id_]->sumsq_data();
        
        //clean up
        solver_->net()->ClearParamDiffs(task.param_id_);
        async_iter_[ make_pair(task.param_id_, task.part_id_) ] += 1;
        update_cnt_ += 1;

        // copy model(data) in solver to mpi buffer
        mpi_buf = send_buf_[make_pair(root_rank, task.param_id_)].first;
        caffe_copy(count / task.num_parts_,
                   blob->cpu_data() + part_offset, mpi_buf + part_offset);

        //ship off
        send_queue_mutex_.lock();
        send_tasks_.push_back(task);
        send_queue_mutex_.unlock();
      }
    }
Beispiel #20
0
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
  if (source.count() != count_ || source.shape() != shape_) {
    if (reshape) {
      ReshapeLike(source);
    } else {
      LOG(FATAL) << "Trying to copy blobs of different sizes.";
    }
  }
  if (copy_diff) {
    caffe_copy(count_, source.cpu_diff(),
        static_cast<Dtype*>(diff_->mutable_cpu_data()));
  } else {
    caffe_copy(count_, source.cpu_data(),
        static_cast<Dtype*>(data_->mutable_cpu_data()));
  }
}
Beispiel #21
0
bool ConvolutionLayerSpatial<float>::verify_result(
    const vector<Blob<float>*>& bottom, const vector<Blob<float>*>& top,
    int_tp index,
    int_tp numImages, const Blob<float> &verify_blob, kernelConfig* config) {

  uint_tp verificationFail = 0;

  if (config->verified)
    return true;
  else if (config->tested)
    return false;

  config->executionTime = timed_convolve(bottom, top, index, numImages,
                                         config);
  const float *verify_data = verify_blob.cpu_data();
  const float *data = top[index]->cpu_data();

  for (int_tp n = 0; n < numImages; ++n) {
    for (int_tp g = 0; g < group_; ++g) {
      int_tp output_image_offset = n * this->top_dim_
          + output_w_ * output_h_ * M_ * g;
      for (int out_ch = 0; out_ch < M_ && !verificationFail; out_ch++)
        for (int h = 0; h < output_h_ && !verificationFail; h++)
          for (int w = 0; w < output_w_; w++) {
            size_t offset = output_image_offset + out_ch * output_w_ * output_h_
                            + h * output_w_ + w;
            if (fabs(data[offset] - verify_data[offset]) >
                       0.1 * fabs(verify_data[offset]) &&
                !(fabs(verify_data[offset]) < 1.e-3
                  && fabs(data[offset] - verify_data[offset]) < 1.e-4)) {
              dbgPrint(printf("test verification failed @ image %d group %d"
                              "out_ch %d h %d w %d got %G expected %G\n",
                      n, g, out_ch, h, w, data[offset], verify_data[offset]));
              verificationFail = 1;
              break;
            }
          }
      if (verificationFail)
        return false;
    }
  }
  return true;
}
Beispiel #22
0
void hdf5_save_nd_dataset<float>(
    const hid_t file_id, const string& dataset_name, const Blob<float>& blob,
    bool write_diff) {
  int num_axes = blob.num_axes();
  hsize_t *dims = new hsize_t[num_axes];
  for (int i = 0; i < num_axes; ++i) {
    dims[i] = blob.shape(i);
  }
  const float* data;
  if (write_diff) {
    data = blob.cpu_diff();
  } else {
    data = blob.cpu_data();
  }
  herr_t status = H5LTmake_dataset_float(
      file_id, dataset_name.c_str(), num_axes, dims, data);
  CHECK_GE(status, 0) << "Failed to make float dataset " << dataset_name;
  delete[] dims;
}
TYPED_TEST(PerspectiveLayerTest, TestForward) {
  typedef typename TypeParam::Dtype Dtype;
  LayerParameter layer_param;
  shared_ptr<PerspectiveLayer<Dtype> > layer(new PerspectiveLayer<Dtype>(layer_param));
  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
  layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
  Blob<Dtype>* top = this->blob_top_vec_[0];
  const Dtype* top_data = top->cpu_data();
  for (int num = 0; num < 2; num++) {
    Dtype slope = this->blob_bottom_a_->cpu_data()[num];
    Dtype intercept = this->blob_bottom_b_->cpu_data()[num];
    for (int row = 0; row < 4; row++) {
      for (int col = 0; col < 5; col++) {
        EXPECT_NEAR(*(top_data + top->offset(num, 0, row, col)),
                    row * slope + intercept,
                    Dtype(1e-7));
      }
    }
  }
}
cv::Mat Blob2Mat(Blob<Dtype>& data_mean){
	const Dtype* mean = data_mean.cpu_data();

	const int height = data_mean.height();
	const int width = data_mean.width();
	const int channels = data_mean.channels();

	CHECK_EQ( channels, 3);
	cv::Mat M(height, width, CV_32FC3);
	for(int c=0; c< channels;++c){
		for(int h=0;h<height;++h){
			for(int w=0;w<width;++w){
				M.at<cv::Vec3f>(h,w)[c] = static_cast<float>(mean[(c*height+h)*width+w]);
			}
		}
	}
	LOG(ERROR) << "ret[0][0]:" << M.at<cv::Vec3f>(0,0)[0] << " mean[0]:" << mean[0];

	return M;
}
Beispiel #25
0
TYPED_TEST(DataTransformTest, TestMeanValue) {
  TransformationParameter transform_param;
  const bool unique_pixels = false;  // pixels are equal to label
  const int label = 0;
  const int channels = 3;
  const int height = 4;
  const int width = 5;
  const int mean_value = 2;

  transform_param.add_mean_value(mean_value);
  Datum datum;
  FillDatum(label, channels, height, width, unique_pixels, &datum);
  Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width);
  DataTransformer<TypeParam>* transformer =
      new DataTransformer<TypeParam>(transform_param, TEST);
  transformer->InitRand();
  transformer->Transform(datum, blob);
  for (int j = 0; j < blob->count(); ++j) {
    EXPECT_EQ(blob->cpu_data()[j], label - mean_value);
  }
}
TYPED_TEST(DataTransformTest, TestMeanFile) {
  TransformationParameter transform_param;
  const bool unique_pixels = true;  // pixels are consecutive ints [0,size]
  const int_tp label = 0;
  const int_tp channels = 3;
  const int_tp height = 4;
  const int_tp width = 5;
  const int_tp size = channels * height * width;

  // Create a mean file
  string* mean_file = new string();
  MakeTempFilename(mean_file);
  BlobProto blob_mean;
  blob_mean.set_num(1);
  blob_mean.set_channels(channels);
  blob_mean.set_height(height);
  blob_mean.set_width(width);

  for (int_tp j = 0; j < size; ++j) {
      blob_mean.add_data(j);
  }

  LOG(INFO) << "Using temporary mean_file " << *mean_file;
  WriteProtoToBinaryFile(blob_mean, *mean_file);

  transform_param.set_mean_file(*mean_file);
  Datum datum;
  FillDatum(label, channels, height, width, unique_pixels, &datum);
  Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width);
  DataTransformer<TypeParam>* transformer =
      new DataTransformer<TypeParam>(transform_param, TEST,
                                     Caffe::GetDefaultDevice());
  transformer->InitRand();
  transformer->Transform(datum, blob);
  for (int_tp j = 0; j < blob->count(); ++j) {
      EXPECT_EQ(blob->cpu_data()[j], 0);
  }
}
Beispiel #27
0
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
  if (blob_mode_ == BlobProto_BlobMode_GLOBAL) {
    if (!copy_diff) {
      LOG(FATAL) << "Currently Petuum Caffe does not support "
                 << "copying data to blobs with GLOBAL mode";
    } // TODO: support CopyFrom( copy_diff == false )
  }
  if (num_ != source.num() || channels_ != source.channels() ||
      height_ != source.height() || width_ != source.width()) {
    if (reshape) {
      Reshape(source.num(), source.channels(), source.height(), source.width());
    } else {
      LOG(FATAL) << "Trying to copy blobs of different sizes.";
    }
  }
  switch (Caffe::mode()) {
  case Caffe::GPU:
    if (copy_diff) {
      caffe_copy(count_, source.gpu_diff(),
          static_cast<Dtype*>(diff_->mutable_gpu_data()));
    } else {
      caffe_copy(count_, source.gpu_data(),
          static_cast<Dtype*>(data_->mutable_gpu_data()));
    }
    break;
  case Caffe::CPU:
    if (copy_diff) {
      caffe_copy(count_, source.cpu_diff(),
          static_cast<Dtype*>(diff_->mutable_cpu_data()));
    } else {
      caffe_copy(count_, source.cpu_data(),
          static_cast<Dtype*>(data_->mutable_cpu_data()));
    }
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
}
Beispiel #28
0
TYPED_TEST(DataTransformTest, TestEmptyTransformUniquePixels) {
  TransformationParameter transform_param;
  const bool unique_pixels = true;  // pixels are consecutive ints [0,size]
  const int label = 0;
  const int channels = 3;
  const int height = 4;
  const int width = 5;

  Datum datum;
  FillDatum(label, channels, height, width, unique_pixels, &datum);
  Blob<TypeParam>* blob = new Blob<TypeParam>(1, 3, 4, 5);
  DataTransformer<TypeParam>* transformer =
      new DataTransformer<TypeParam>(transform_param, TEST);
  transformer->InitRand();
  transformer->Transform(datum, blob);
  EXPECT_EQ(blob->num(), 1);
  EXPECT_EQ(blob->channels(), datum.channels());
  EXPECT_EQ(blob->height(), datum.height());
  EXPECT_EQ(blob->width(), datum.width());
  for (int j = 0; j < blob->count(); ++j) {
    EXPECT_EQ(blob->cpu_data()[j], j);
  }
}
Beispiel #29
0
TYPED_TEST(DataTransformTest, TestEmptyTransform) {
  TransformationParameter transform_param;
  const bool unique_pixels = false;  // all pixels the same equal to label
  const int label = 0;
  const int channels = 3;
  const int height = 4;
  const int width = 5;

  Datum datum;
  FillDatum(label, channels, height, width, unique_pixels, &datum);
  Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width);
  DataTransformer<TypeParam>* transformer =
      new DataTransformer<TypeParam>(transform_param, TEST);
  transformer->InitRand();
  transformer->Transform(datum, blob);
  EXPECT_EQ(blob->num(), 1);
  EXPECT_EQ(blob->channels(), datum.channels());
  EXPECT_EQ(blob->height(), datum.height());
  EXPECT_EQ(blob->width(), datum.width());
  for (int j = 0; j < blob->count(); ++j) {
    EXPECT_EQ(blob->cpu_data()[j], label);
  }
}
Beispiel #30
0
void ScaleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (bias_layer_ &&
      this->param_propagate_down_[this->param_propagate_down_.size() - 1]) {
    bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_);
  }
  const bool scale_param = (bottom.size() == 1);
  Blob<Dtype>* scale = scale_param ? this->blobs_[0].get() : bottom[1];
  if ((!scale_param && propagate_down[1]) ||
      (scale_param && this->param_propagate_down_[0])) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const bool in_place = (bottom[0] == top[0]);
    const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data();
    // Hack: store big eltwise product in bottom[0] diff, except in the special
    // case where this layer itself does the eltwise product, in which case we
    // can store it directly in the scale diff, and we're done.
    // If we're computing in-place (and not doing eltwise computation), this
    // hack doesn't work and we store the product in temp_.
    const bool is_eltwise = (bottom[0]->count() == scale->count());
    Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() :
        (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff()));
    caffe_mul(top[0]->count(), top_diff, bottom_data, product);
    if (!is_eltwise) {
      Dtype* sum_result = NULL;
      if (inner_dim_ == 1) {
        sum_result = product;
      } else if (sum_result_.count() == 1) {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        Dtype* scale_diff = scale->mutable_cpu_diff();
        if (scale_param) {
          Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult);
          *scale_diff += result;
        } else {
          *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult);
        }
      } else {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        sum_result = (outer_dim_ == 1) ?
            scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data();
        caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
                       Dtype(1), product, sum_mult, Dtype(0), sum_result);
      }
      if (outer_dim_ != 1) {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        Dtype* scale_diff = scale->mutable_cpu_diff();
        if (scale_dim_ == 1) {
          if (scale_param) {
            Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
            *scale_diff += result;
          } else {
            *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
          }
        } else {
          caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_,
                         Dtype(1), sum_result, sum_mult, Dtype(scale_param),
                         scale_diff);
        }
      }
    }
  }
  if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* scale_data = scale->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    for (int n = 0; n < outer_dim_; ++n) {
      for (int d = 0; d < scale_dim_; ++d) {
        const Dtype factor = scale_data[d];
        caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff);
        bottom_diff += inner_dim_;
        top_diff += inner_dim_;
      }
    }
  }
}