Ejemplo n.º 1
0
void* VideoDataLayerPrefetch(void* layer_pointer) {
  CHECK(layer_pointer);
  VideoDataLayer<Dtype>* layer = static_cast<VideoDataLayer<Dtype>*>(layer_pointer);
  CHECK(layer);

  VolumeDatum datum;
  CHECK(layer->prefetch_data_);
  Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
  Dtype* top_label;
  if (layer->output_labels_) {
    top_label = layer->prefetch_label_->mutable_cpu_data();
  }
  const Dtype scale = layer->layer_param_.image_data_param().scale();
  const int batch_size = layer->layer_param_.image_data_param().batch_size();
  const int crop_size = layer->layer_param_.image_data_param().crop_size();
  const bool mirror = layer->layer_param_.image_data_param().mirror();
  const int new_length  = layer->layer_param_.image_data_param().new_length();
  const int new_height  = layer->layer_param_.image_data_param().new_height();
  const int new_width  = layer->layer_param_.image_data_param().new_width();
  const bool use_image = layer->layer_param_.image_data_param().use_image();
  const int sampling_rate = layer->layer_param_.image_data_param().sampling_rate();
  const bool use_temporal_jitter = layer->layer_param_.image_data_param().use_temporal_jitter();

  if (mirror && crop_size == 0) {
    LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
        << "set at the same time.";
  }
  // datum scales
  const int channels = layer->datum_channels_;
  const int length = layer->datum_length_;
  const int height = layer->datum_height_;
  const int width = layer->datum_width_;
  const int size = layer->datum_size_;
  const int chunks_size = layer->shuffle_index_.size();
  const Dtype* mean = layer->data_mean_.cpu_data();
  const int show_data = layer->layer_param_.image_data_param().show_data();
  char *data_buffer;
  if (show_data)
	  data_buffer = new char[size];
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    // get a blob
    CHECK_GT(chunks_size, layer->lines_id_);
    bool read_status;
    int id = layer->shuffle_index_[layer->lines_id_];
    if (!use_image){
    	if (!use_temporal_jitter){
    		read_status = ReadVideoToVolumeDatum(layer->file_list_[id].c_str(), layer->start_frm_list_[id],
    	    		layer->label_list_[id], new_length, new_height, new_width, sampling_rate, &datum);
    	}else{
    		read_status = ReadVideoToVolumeDatum(layer->file_list_[id].c_str(), -1,
    	    		layer->label_list_[id], new_length, new_height, new_width, sampling_rate, &datum);
    	}
    }
    else {
    	if (!use_temporal_jitter) {
    		read_status = ReadImageSequenceToVolumeDatum(layer->file_list_[id].c_str(), layer->start_frm_list_[id],
    	    	    		layer->label_list_[id], new_length, new_height, new_width, layer->individual_sampling_rate_list_[id], &datum); 	// 090515
    	} else {
    		int num_of_frames = layer->start_frm_list_[id];
    		int use_start_frame;
    		if (num_of_frames<new_length*sampling_rate){
    			LOG(INFO) << "not enough frames; having " << num_of_frames;
    			read_status = false;
    		} else {
    			if (layer->phase_ == Caffe::TRAIN)
    				use_start_frame = layer->PrefetchRand()%(num_of_frames-new_length*sampling_rate+1)+1;
    			else
    				use_start_frame = 0;

    			read_status = ReadImageSequenceToVolumeDatum(layer->file_list_[id].c_str(), use_start_frame,
    			    	    	    		layer->label_list_[id], new_length, new_height, new_width, layer->individual_sampling_rate_list_[id], &datum); 	// 090515
    		}
    	}
    }

    if (layer->phase_ == Caffe::TEST){
    	CHECK(read_status) << "Testing must not miss any example";
    }

    if (!read_status) {
    	//LOG(ERROR) << "cannot read " << layer->file_list_[id];
        layer->lines_id_++;
        if (layer->lines_id_ >= chunks_size) {
          // We have reached the end. Restart from the first.
          DLOG(INFO) << "Restarting data prefetching from start.";
          layer->lines_id_ = 0;
          if (layer->layer_param_.image_data_param().shuffle()){
        	  std::random_shuffle(layer->shuffle_index_.begin(), layer->shuffle_index_.end());
          }
        }
        item_id--;
        continue;
    }
    //LOG(INFO) << "--> " << item_id;
    //LOG(INFO) << "label " << datum.label();
    const string& data = datum.data();
    if (crop_size) {
      CHECK(data.size()) << "Image cropping only support uint8 data";
      int h_off, w_off;
      // We only do random crop when we do training.
      if (layer->phase_ == Caffe::TRAIN) {
        h_off = layer->PrefetchRand() % (height - crop_size);
        w_off = layer->PrefetchRand() % (width - crop_size);
      } else {
        h_off = (height - crop_size) / 2;
        w_off = (width - crop_size) / 2;
      }
      if (mirror && layer->PrefetchRand() % 2) {
        // Copy mirrored version
        for (int c = 0; c < channels; ++c) {
          for (int l = 0; l < length; ++l) {
            for (int h = 0; h < crop_size; ++h) {
              for (int w = 0; w < crop_size; ++w) {
                int top_index = (((item_id * channels + c) * length + l) * crop_size + h)
                              * crop_size + (crop_size - 1 - w);
                int data_index = ((c * length + l) * height + h + h_off) * width + w + w_off;
                Dtype datum_element =
                  static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
                top_data[top_index] = (datum_element - mean[data_index]) * scale;
                if (show_data)
                	data_buffer[((c * length + l) * crop_size + h)
                                * crop_size + (crop_size - 1 - w)] = static_cast<uint8_t>(data[data_index]);
              }
            }
          }
        }
      } else {
        // Normal copy
        for (int c = 0; c < channels; ++c) {
          for (int l = 0; l < length; ++l) {
            for (int h = 0; h < crop_size; ++h) {
              for (int w = 0; w < crop_size; ++w) {
                int top_index = (((item_id * channels + c) * length + l) * crop_size + h)
                              * crop_size + w;
                int data_index = ((c * length + l) * height + h + h_off) * width + w + w_off;
                Dtype datum_element =
                  static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
                top_data[top_index] = (datum_element - mean[data_index]) * scale;
                if (show_data)
                	data_buffer[((c * length + l) * crop_size + h)
                                * crop_size + w] = static_cast<uint8_t>(data[data_index]);
              }
            }
          }
        }
      }
    } else {
      // we will prefer to use data() first, and then try float_data()
      if (data.size()) {
        for (int j = 0; j < size; ++j) {
          Dtype datum_element =
              static_cast<Dtype>(static_cast<uint8_t>(data[j]));
          top_data[item_id * size + j] = (datum_element - mean[j]) * scale;
          if (show_data)
        	  data_buffer[j] = static_cast<uint8_t>(data[j]);
        }
      } else {
        for (int j = 0; j < size; ++j) {
          top_data[item_id * size + j] =
              (datum.float_data(j) - mean[j]) * scale;
        }
      }
    }

    if (show_data>0){
    	int image_size, channel_size;
    	if (crop_size){
    		image_size = crop_size * crop_size;
    	}else{
    		image_size = height * width;
    	}
    	channel_size = length * image_size;
    	for (int l = 0; l < length; ++l) {
    		for (int c = 0; c < channels; ++c) {
    			cv::Mat img;
    			char ch_name[64];
    			if (crop_size)
    				BufferToGrayImage(data_buffer + c * channel_size + l * image_size, crop_size, crop_size, &img);
    			else
    				BufferToGrayImage(data_buffer + c * channel_size + l * image_size, height, width, &img);
    			sprintf(ch_name, "Channel %d", c);
    			cv::namedWindow(ch_name, CV_WINDOW_AUTOSIZE);
    			cv::imshow( ch_name, img);
    		}
    		cv::waitKey(100);
    	}
    }
    if (layer->output_labels_) {
      top_label[item_id] = datum.label();
      // LOG(INFO) << "fetching label" << datum.label() << std::endl;
    }

    layer->lines_id_++;
    if (layer->lines_id_ >= chunks_size) {
      // We have reached the end. Restart from the first.
      DLOG(INFO) << "Restarting data prefetching from start.";
      layer->lines_id_ = 0;
      if (layer->layer_param_.image_data_param().shuffle()){
    	  std::random_shuffle(layer->shuffle_index_.begin(), layer->shuffle_index_.end());
      }
    }
  }
  if (show_data & data_buffer!=NULL)
	  delete []data_buffer;
  return static_cast<void*>(NULL);
}
Ejemplo n.º 2
0
void* VolumeDataLayerPrefetch(void* layer_pointer) {
  CHECK(layer_pointer);
  VolumeDataLayer<Dtype>* layer = static_cast<VolumeDataLayer<Dtype>*>(layer_pointer);
  CHECK(layer);
  VolumeDatum datum;
  CHECK(layer->prefetch_data_);
  Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
  Dtype* top_label;
  if (layer->output_labels_) {
    top_label = layer->prefetch_label_->mutable_cpu_data();
  }
  const Dtype scale = layer->layer_param_.data_param().scale();
  const int batch_size = layer->layer_param_.data_param().batch_size();
  const int crop_size = layer->layer_param_.data_param().crop_size();
  const bool mirror = layer->layer_param_.data_param().mirror();

  if (mirror && crop_size == 0) {
    LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
        << "set at the same time.";
  }
  // datum scales
  const int channels = layer->datum_channels_;
  const int length = layer->datum_length_;
  const int height = layer->datum_height_;
  const int width = layer->datum_width_;
  const int size = layer->datum_size_;
  const Dtype* mean = layer->data_mean_.cpu_data();
  const int show_data = layer->layer_param_.data_param().show_data();
  char *data_buffer;
  if (show_data)
	  data_buffer = new char[size];
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    // get a blob
    CHECK(layer->iter_);
    CHECK(layer->iter_->Valid());
    datum.ParseFromString(layer->iter_->value().ToString());
    const string& data = datum.data();
    if (crop_size) {
      CHECK(data.size()) << "Image cropping only support uint8 data";
      int h_off, w_off;
      // We only do random crop when we do training.
      if (layer->phase_ == Caffe::TRAIN) {
        h_off = layer->PrefetchRand() % (height - crop_size);
        w_off = layer->PrefetchRand() % (width - crop_size);
      } else {
        h_off = (height - crop_size) / 2;
        w_off = (width - crop_size) / 2;
      }
      if (mirror && layer->PrefetchRand() % 2) {
        // Copy mirrored version
        for (int c = 0; c < channels; ++c) {
          for (int l = 0; l < length; ++l) {
            for (int h = 0; h < crop_size; ++h) {
              for (int w = 0; w < crop_size; ++w) {
                int top_index = (((item_id * channels + c) * length + l) * crop_size + h)
                              * crop_size + (crop_size - 1 - w);
                int data_index = ((c * length + l) * height + h + h_off) * width + w + w_off;
                Dtype datum_element =
                  static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
                top_data[top_index] = (datum_element - mean[data_index]) * scale;
                if (show_data)
                	data_buffer[((c * length + l) * crop_size + h)
                                * crop_size + (crop_size - 1 - w)] = static_cast<uint8_t>(data[data_index]);
              }
            }
          }
        }
      } else {
        // Normal copy
        for (int c = 0; c < channels; ++c) {
          for (int l = 0; l < length; ++l) {
            for (int h = 0; h < crop_size; ++h) {
              for (int w = 0; w < crop_size; ++w) {
                int top_index = (((item_id * channels + c) * length + l) * crop_size + h)
                              * crop_size + w;
                int data_index = ((c * length + l) * height + h + h_off) * width + w + w_off;
                Dtype datum_element =
                  static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
                top_data[top_index] = (datum_element - mean[data_index]) * scale;
                if (show_data)
                	data_buffer[((c * length + l) * crop_size + h)
                                * crop_size + w] = static_cast<uint8_t>(data[data_index]);
              }
            }
          }
        }
      }
    } else {
      // we will prefer to use data() first, and then try float_data()
      if (data.size()) {
        for (int j = 0; j < size; ++j) {
          Dtype datum_element =
              static_cast<Dtype>(static_cast<uint8_t>(data[j]));
          top_data[item_id * size + j] = (datum_element - mean[j]) * scale;
          if (show_data)
        	  data_buffer[j] = static_cast<uint8_t>(data[j]);
        }
      } else {
        for (int j = 0; j < size; ++j) {
          top_data[item_id * size + j] =
              (datum.float_data(j) - mean[j]) * scale;
        }
      }
    }

    if (show_data>0){
    	int image_size, channel_size;
    	if (crop_size){
    		image_size = crop_size * crop_size;
    	}else{
    		image_size = height * width;
    	}
    	channel_size = length * image_size;
    	for (int l = 0; l < length; ++l) {
    		for (int c = 0; c < channels; ++c) {
    			cv::Mat img;
    			char ch_name[64];
    			if (crop_size)
    				BufferToGrayImage(data_buffer + c * channel_size + l * image_size, crop_size, crop_size, &img);
    			else
    				BufferToGrayImage(data_buffer + c * channel_size + l * image_size, height, width, &img);
    			sprintf(ch_name, "Channel %d", c);
    			cv::namedWindow(ch_name, CV_WINDOW_AUTOSIZE);
    			cv::imshow( ch_name, img);
    		}
    		cv::waitKey(100);
    	}
    }
    if (layer->output_labels_) {
      top_label[item_id] = datum.label();
    }
    // go to the next iteration
    layer->iter_->Next();
    if (!layer->iter_->Valid()) {
      // We have reached the end. Restart from the first.
      DLOG(INFO) << "Restarting data prefetching from start.";
      layer->iter_->SeekToFirst();
    }
  }
  if (show_data & data_buffer!=NULL)
	  delete []data_buffer;
  return static_cast<void*>(NULL);
}