void MILDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
    CPUTimer timer;
    timer.Start();
    CHECK(batch->data_.count());

    //Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
    //Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
    Dtype* top_data = batch->data_.mutable_cpu_data();
    Dtype* top_label = batch->label_.mutable_cpu_data();
    const int img_size = this->transform_param_.crop_size();
    const int channels = this->layer_param_.mil_data_param().channels();
    const int scale = this->transform_param_.scale();
    const bool mirror = this->transform_param_.mirror();

    const int images_per_batch = this->layer_param_.mil_data_param().images_per_batch();
    const int n_classes = this->layer_param_.mil_data_param().n_classes();
    const int num_scales = this->layer_param_.mil_data_param().num_scales();
    const float scale_factor = this->layer_param_.mil_data_param().scale_factor();

    // zero out batch
    //caffe_set(this->prefetch_data_.count(), Dtype(0), top_data);
    caffe_set(batch->data_.count(), Dtype(0), top_data);
    int item_id;
    for(int i_image = 0; i_image < images_per_batch; i_image++){
      // Sample which image to read
      unsigned int index = counter_; counter_ = counter_ + 1;
      const unsigned int rand_index = this->PrefetchRand();
      if(this->layer_param_.mil_data_param().randomize())
        index = rand_index;

      // LOG(INFO) << index % this->num_images_ << ", " << this->num_images_;
      pair<string, string> p = this->image_database_[index % this->num_images_];
      string im_name = p.first;
      string full_im_name = p.second;
    
      cv::Mat cv_img = cv::imread(full_im_name, CV_LOAD_IMAGE_COLOR);
      if (!cv_img.data) {
        LOG(ERROR) << "Could not open or find file " << full_im_name;
        return;
      }
    
      //REVIEW ktran: do not hardcode dataset name (or its prefix "/labels-")
      //REVIEW ktran: also do not use deep dataset name so that we don't have to modify the core caffe code
      //(ref: https://github.com/BVLC/caffe/commit/a0787631a27ca6478f70341462aafdcf35dabb19)
      hdf5_load_nd_dataset(this->label_file_id_, string("/labels-"+im_name).c_str(), 4, 4, &this->label_blob_);
      const Dtype* label = label_blob_.mutable_cpu_data();
    
      CHECK_EQ(label_blob_.width(), 1)          << "Expected width of label to be 1." ;
      CHECK_EQ(label_blob_.height(), n_classes) << "Expected height of label to be " << n_classes;
      CHECK_EQ(label_blob_.channels(), 1)       << "Expected channels of label to be 1." ;
      CHECK_EQ(label_blob_.num(), 1)            << "Expected num of label to be 1." ;

      float img_size_i = img_size;
      for(int i_scales = 0; i_scales < num_scales; i_scales++){
        // Resize such that the image is of size img_size, img_size
        item_id = i_image*num_scales + i_scales;
        // LOG(INFO) << "MIL Data Layer: scale: " << (int) round(img_size_i);
        cv::Mat cv_cropped_img = Transform_IDL(cv_img, static_cast<int>(round(img_size_i)), mirror);
        for (int c = 0; c < channels; ++c) {
          for (int h = 0; h < cv_cropped_img.rows; ++h) {
            for (int w = 0; w < cv_cropped_img.cols; ++w) {
              Dtype pixel =
                  static_cast<Dtype>(cv_cropped_img.at<cv::Vec3b>(h, w)[c]);
              top_data[((item_id * channels + c) * img_size + h)
                       * img_size + w]
                  = (pixel - static_cast<Dtype>(mean_value_[c]))*scale;
            }
          }
        }
        img_size_i = std::max(static_cast<float>(1.), img_size_i*scale_factor);
      }
      
      for(int i_label = 0; i_label < n_classes; i_label++){
        top_label[i_image*n_classes + i_label] = 
          label[i_label];
      }
    }

    timer.Stop();
    DLOG(INFO) << "Prefetch batch: " << timer.MilliSeconds() << " ms.";
  }