void MILDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer timer; timer.Start(); CHECK(batch->data_.count()); //Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); //Dtype* top_label = this->prefetch_label_.mutable_cpu_data(); Dtype* top_data = batch->data_.mutable_cpu_data(); Dtype* top_label = batch->label_.mutable_cpu_data(); const int img_size = this->transform_param_.crop_size(); const int channels = this->layer_param_.mil_data_param().channels(); const int scale = this->transform_param_.scale(); const bool mirror = this->transform_param_.mirror(); const int images_per_batch = this->layer_param_.mil_data_param().images_per_batch(); const int n_classes = this->layer_param_.mil_data_param().n_classes(); const int num_scales = this->layer_param_.mil_data_param().num_scales(); const float scale_factor = this->layer_param_.mil_data_param().scale_factor(); // zero out batch //caffe_set(this->prefetch_data_.count(), Dtype(0), top_data); caffe_set(batch->data_.count(), Dtype(0), top_data); int item_id; for(int i_image = 0; i_image < images_per_batch; i_image++){ // Sample which image to read unsigned int index = counter_; counter_ = counter_ + 1; const unsigned int rand_index = this->PrefetchRand(); if(this->layer_param_.mil_data_param().randomize()) index = rand_index; // LOG(INFO) << index % this->num_images_ << ", " << this->num_images_; pair<string, string> p = this->image_database_[index % this->num_images_]; string im_name = p.first; string full_im_name = p.second; cv::Mat cv_img = cv::imread(full_im_name, CV_LOAD_IMAGE_COLOR); if (!cv_img.data) { LOG(ERROR) << "Could not open or find file " << full_im_name; return; } //REVIEW ktran: do not hardcode dataset name (or its prefix "/labels-") //REVIEW ktran: also do not use deep dataset name so that we don't have to modify the core caffe code //(ref: https://github.com/BVLC/caffe/commit/a0787631a27ca6478f70341462aafdcf35dabb19) hdf5_load_nd_dataset(this->label_file_id_, string("/labels-"+im_name).c_str(), 4, 4, &this->label_blob_); const Dtype* label = label_blob_.mutable_cpu_data(); CHECK_EQ(label_blob_.width(), 1) << "Expected width of label to be 1." ; CHECK_EQ(label_blob_.height(), n_classes) << "Expected height of label to be " << n_classes; CHECK_EQ(label_blob_.channels(), 1) << "Expected channels of label to be 1." ; CHECK_EQ(label_blob_.num(), 1) << "Expected num of label to be 1." ; float img_size_i = img_size; for(int i_scales = 0; i_scales < num_scales; i_scales++){ // Resize such that the image is of size img_size, img_size item_id = i_image*num_scales + i_scales; // LOG(INFO) << "MIL Data Layer: scale: " << (int) round(img_size_i); cv::Mat cv_cropped_img = Transform_IDL(cv_img, static_cast<int>(round(img_size_i)), mirror); for (int c = 0; c < channels; ++c) { for (int h = 0; h < cv_cropped_img.rows; ++h) { for (int w = 0; w < cv_cropped_img.cols; ++w) { Dtype pixel = static_cast<Dtype>(cv_cropped_img.at<cv::Vec3b>(h, w)[c]); top_data[((item_id * channels + c) * img_size + h) * img_size + w] = (pixel - static_cast<Dtype>(mean_value_[c]))*scale; } } } img_size_i = std::max(static_cast<float>(1.), img_size_i*scale_factor); } for(int i_label = 0; i_label < n_classes; i_label++){ top_label[i_image*n_classes + i_label] = label[i_label]; } } timer.Stop(); DLOG(INFO) << "Prefetch batch: " << timer.MilliSeconds() << " ms."; }