void* VideoDataLayerPrefetch(void* layer_pointer) { CHECK(layer_pointer); VideoDataLayer<Dtype>* layer = static_cast<VideoDataLayer<Dtype>*>(layer_pointer); CHECK(layer); VolumeDatum datum; CHECK(layer->prefetch_data_); Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); Dtype* top_label; if (layer->output_labels_) { top_label = layer->prefetch_label_->mutable_cpu_data(); } const Dtype scale = layer->layer_param_.image_data_param().scale(); const int batch_size = layer->layer_param_.image_data_param().batch_size(); const int crop_size = layer->layer_param_.image_data_param().crop_size(); const bool mirror = layer->layer_param_.image_data_param().mirror(); const int new_length = layer->layer_param_.image_data_param().new_length(); const int new_height = layer->layer_param_.image_data_param().new_height(); const int new_width = layer->layer_param_.image_data_param().new_width(); const bool use_image = layer->layer_param_.image_data_param().use_image(); const int sampling_rate = layer->layer_param_.image_data_param().sampling_rate(); const bool use_temporal_jitter = layer->layer_param_.image_data_param().use_temporal_jitter(); if (mirror && crop_size == 0) { LOG(FATAL) << "Current implementation requires mirror and crop_size to be " << "set at the same time."; } // datum scales const int channels = layer->datum_channels_; const int length = layer->datum_length_; const int height = layer->datum_height_; const int width = layer->datum_width_; const int size = layer->datum_size_; const int chunks_size = layer->shuffle_index_.size(); const Dtype* mean = layer->data_mean_.cpu_data(); const int show_data = layer->layer_param_.image_data_param().show_data(); char *data_buffer; if (show_data) data_buffer = new char[size]; for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob CHECK_GT(chunks_size, layer->lines_id_); bool read_status; int id = layer->shuffle_index_[layer->lines_id_]; if (!use_image){ if (!use_temporal_jitter){ read_status = ReadVideoToVolumeDatum(layer->file_list_[id].c_str(), layer->start_frm_list_[id], layer->label_list_[id], new_length, new_height, new_width, sampling_rate, &datum); }else{ read_status = ReadVideoToVolumeDatum(layer->file_list_[id].c_str(), -1, layer->label_list_[id], new_length, new_height, new_width, sampling_rate, &datum); } } else { if (!use_temporal_jitter) { read_status = ReadImageSequenceToVolumeDatum(layer->file_list_[id].c_str(), layer->start_frm_list_[id], layer->label_list_[id], new_length, new_height, new_width, layer->individual_sampling_rate_list_[id], &datum); // 090515 } else { int num_of_frames = layer->start_frm_list_[id]; int use_start_frame; if (num_of_frames<new_length*sampling_rate){ LOG(INFO) << "not enough frames; having " << num_of_frames; read_status = false; } else { if (layer->phase_ == Caffe::TRAIN) use_start_frame = layer->PrefetchRand()%(num_of_frames-new_length*sampling_rate+1)+1; else use_start_frame = 0; read_status = ReadImageSequenceToVolumeDatum(layer->file_list_[id].c_str(), use_start_frame, layer->label_list_[id], new_length, new_height, new_width, layer->individual_sampling_rate_list_[id], &datum); // 090515 } } } if (layer->phase_ == Caffe::TEST){ CHECK(read_status) << "Testing must not miss any example"; } if (!read_status) { //LOG(ERROR) << "cannot read " << layer->file_list_[id]; layer->lines_id_++; if (layer->lines_id_ >= chunks_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; layer->lines_id_ = 0; if (layer->layer_param_.image_data_param().shuffle()){ std::random_shuffle(layer->shuffle_index_.begin(), layer->shuffle_index_.end()); } } item_id--; continue; } //LOG(INFO) << "--> " << item_id; //LOG(INFO) << "label " << datum.label(); const string& data = datum.data(); if (crop_size) { CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. if (layer->phase_ == Caffe::TRAIN) { h_off = layer->PrefetchRand() % (height - crop_size); w_off = layer->PrefetchRand() % (width - crop_size); } else { h_off = (height - crop_size) / 2; w_off = (width - crop_size) / 2; } if (mirror && layer->PrefetchRand() % 2) { // Copy mirrored version for (int c = 0; c < channels; ++c) { for (int l = 0; l < length; ++l) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = (((item_id * channels + c) * length + l) * crop_size + h) * crop_size + (crop_size - 1 - w); int data_index = ((c * length + l) * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; if (show_data) data_buffer[((c * length + l) * crop_size + h) * crop_size + (crop_size - 1 - w)] = static_cast<uint8_t>(data[data_index]); } } } } } else { // Normal copy for (int c = 0; c < channels; ++c) { for (int l = 0; l < length; ++l) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = (((item_id * channels + c) * length + l) * crop_size + h) * crop_size + w; int data_index = ((c * length + l) * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; if (show_data) data_buffer[((c * length + l) * crop_size + h) * crop_size + w] = static_cast<uint8_t>(data[data_index]); } } } } } } else { // we will prefer to use data() first, and then try float_data() if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]) * scale; if (show_data) data_buffer[j] = static_cast<uint8_t>(data[j]); } } else { for (int j = 0; j < size; ++j) { top_data[item_id * size + j] = (datum.float_data(j) - mean[j]) * scale; } } } if (show_data>0){ int image_size, channel_size; if (crop_size){ image_size = crop_size * crop_size; }else{ image_size = height * width; } channel_size = length * image_size; for (int l = 0; l < length; ++l) { for (int c = 0; c < channels; ++c) { cv::Mat img; char ch_name[64]; if (crop_size) BufferToGrayImage(data_buffer + c * channel_size + l * image_size, crop_size, crop_size, &img); else BufferToGrayImage(data_buffer + c * channel_size + l * image_size, height, width, &img); sprintf(ch_name, "Channel %d", c); cv::namedWindow(ch_name, CV_WINDOW_AUTOSIZE); cv::imshow( ch_name, img); } cv::waitKey(100); } } if (layer->output_labels_) { top_label[item_id] = datum.label(); // LOG(INFO) << "fetching label" << datum.label() << std::endl; } layer->lines_id_++; if (layer->lines_id_ >= chunks_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; layer->lines_id_ = 0; if (layer->layer_param_.image_data_param().shuffle()){ std::random_shuffle(layer->shuffle_index_.begin(), layer->shuffle_index_.end()); } } } if (show_data & data_buffer!=NULL) delete []data_buffer; return static_cast<void*>(NULL); }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc != 3) { LOG(ERROR) << "Usage: compute_image_mean input_leveldb output_file"; return 1; } leveldb::DB* db; leveldb::Options options; options.create_if_missing = false; LOG(INFO) << "Opening leveldb " << argv[1]; leveldb::Status status = leveldb::DB::Open( options, argv[1], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[1]; leveldb::ReadOptions read_options; read_options.fill_cache = false; leveldb::Iterator* it = db->NewIterator(read_options); it->SeekToFirst(); VolumeDatum datum; BlobProto sum_blob; int count = 0; datum.ParseFromString(it->value().ToString()); sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_length(datum.length()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.length() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; for (it->SeekToFirst(); it->Valid(); it->Next()) { // just a dummy operation datum.ParseFromString(it->value().ToString()); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } if (count % 10000 != 0) { LOG(ERROR) << "Processed " << count << " files."; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); delete db; return 0; }
void* VolumeDataLayerPrefetch(void* layer_pointer) { CHECK(layer_pointer); VolumeDataLayer<Dtype>* layer = static_cast<VolumeDataLayer<Dtype>*>(layer_pointer); CHECK(layer); VolumeDatum datum; CHECK(layer->prefetch_data_); Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); Dtype* top_label; if (layer->output_labels_) { top_label = layer->prefetch_label_->mutable_cpu_data(); } const Dtype scale = layer->layer_param_.data_param().scale(); const int batch_size = layer->layer_param_.data_param().batch_size(); const int crop_size = layer->layer_param_.data_param().crop_size(); const bool mirror = layer->layer_param_.data_param().mirror(); if (mirror && crop_size == 0) { LOG(FATAL) << "Current implementation requires mirror and crop_size to be " << "set at the same time."; } // datum scales const int channels = layer->datum_channels_; const int length = layer->datum_length_; const int height = layer->datum_height_; const int width = layer->datum_width_; const int size = layer->datum_size_; const Dtype* mean = layer->data_mean_.cpu_data(); const int show_data = layer->layer_param_.data_param().show_data(); char *data_buffer; if (show_data) data_buffer = new char[size]; for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob CHECK(layer->iter_); CHECK(layer->iter_->Valid()); datum.ParseFromString(layer->iter_->value().ToString()); const string& data = datum.data(); if (crop_size) { CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. if (layer->phase_ == Caffe::TRAIN) { h_off = layer->PrefetchRand() % (height - crop_size); w_off = layer->PrefetchRand() % (width - crop_size); } else { h_off = (height - crop_size) / 2; w_off = (width - crop_size) / 2; } if (mirror && layer->PrefetchRand() % 2) { // Copy mirrored version for (int c = 0; c < channels; ++c) { for (int l = 0; l < length; ++l) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = (((item_id * channels + c) * length + l) * crop_size + h) * crop_size + (crop_size - 1 - w); int data_index = ((c * length + l) * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; if (show_data) data_buffer[((c * length + l) * crop_size + h) * crop_size + (crop_size - 1 - w)] = static_cast<uint8_t>(data[data_index]); } } } } } else { // Normal copy for (int c = 0; c < channels; ++c) { for (int l = 0; l < length; ++l) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = (((item_id * channels + c) * length + l) * crop_size + h) * crop_size + w; int data_index = ((c * length + l) * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; if (show_data) data_buffer[((c * length + l) * crop_size + h) * crop_size + w] = static_cast<uint8_t>(data[data_index]); } } } } } } else { // we will prefer to use data() first, and then try float_data() if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]) * scale; if (show_data) data_buffer[j] = static_cast<uint8_t>(data[j]); } } else { for (int j = 0; j < size; ++j) { top_data[item_id * size + j] = (datum.float_data(j) - mean[j]) * scale; } } } if (show_data>0){ int image_size, channel_size; if (crop_size){ image_size = crop_size * crop_size; }else{ image_size = height * width; } channel_size = length * image_size; for (int l = 0; l < length; ++l) { for (int c = 0; c < channels; ++c) { cv::Mat img; char ch_name[64]; if (crop_size) BufferToGrayImage(data_buffer + c * channel_size + l * image_size, crop_size, crop_size, &img); else BufferToGrayImage(data_buffer + c * channel_size + l * image_size, height, width, &img); sprintf(ch_name, "Channel %d", c); cv::namedWindow(ch_name, CV_WINDOW_AUTOSIZE); cv::imshow( ch_name, img); } cv::waitKey(100); } } if (layer->output_labels_) { top_label[item_id] = datum.label(); } // go to the next iteration layer->iter_->Next(); if (!layer->iter_->Valid()) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; layer->iter_->SeekToFirst(); } } if (show_data & data_buffer!=NULL) delete []data_buffer; return static_cast<void*>(NULL); }