void DataReader::Body::read_one(db::Cursor* cursor, db::Transaction* dblt, QueuePair* qp) { Datum* datum = qp->free_.pop(); // TODO deserialize in-place instead of copy? datum->ParseFromString(cursor->value()); if (dblt != NULL) { string labels; CHECK_EQ(dblt->Get(cursor->key(), labels), 0); Datum labelDatum; labelDatum.ParseFromString(labels); // datum->MergeFrom(labelDatum); datum->set_channels(datum->channels() + labelDatum.channels()); datum->mutable_float_data()->MergeFrom(labelDatum.float_data()); datum->mutable_data()->append(labelDatum.data()); } qp->full_.push(datum); // go to the next iter cursor->Next(); if (!cursor->valid()) { DLOG(INFO) << "Restarting data prefetching from start."; cursor->SeekToFirst(); } }
bool MostCV::LevelDBReader::GetNextEntry(string &key, vector<double> &retVec, int &label) { if (!database_iter_->Valid()) return false; Datum datum; datum.clear_float_data(); datum.clear_data(); datum.ParseFromString(database_iter_->value().ToString()); key = database_iter_->key().ToString(); label = datum.label(); int expected_data_size = std::max<int>(datum.data().size(), datum.float_data_size()); const int datum_volume_size = datum.channels() * datum.height() * datum.width(); if (expected_data_size != datum_volume_size) { cout << "Something wrong in saved data."; assert(false); } retVec.resize(datum_volume_size); const string& data = datum.data(); if (data.size() != 0) { // Data stored in string, e.g. just pixel values of 196608 = 256 * 256 * 3 for (int i = 0; i < datum_volume_size; ++i) retVec[i] = data[i]; } else { // Data stored in real feature vector such as 4096 from feature extraction for (int i = 0; i < datum_volume_size; ++i) retVec[i] = datum.float_data(i); } database_iter_->Next(); ++record_idx_; return true; }
void MyImageDataLayer<Dtype>::fetchData() { Datum datum; CHECK(prefetch_data_.count()); Dtype* top_data = prefetch_data_.mutable_cpu_data(); Dtype* top_label = prefetch_label_.mutable_cpu_data(); ImageDataParameter image_data_param = this->layer_param_.image_data_param(); const Dtype scale = image_data_param.scale();//image_data_layer相关参数 const int batch_size = 1;//image_data_param.batch_size(); 这里我们只需要一张图片 const int crop_size = image_data_param.crop_size(); const bool mirror = image_data_param.mirror(); const int new_height = image_data_param.new_height(); const int new_width = image_data_param.new_width(); if (mirror && crop_size == 0) { LOG(FATAL) << "Current implementation requires mirror and crop_size to be " << "set at the same time."; } // datum scales const int channels = datum_channels_; const int height = datum_height_; const int width = datum_width_; const int size = datum_size_; const int lines_size = lines_.size(); const Dtype* mean = data_mean_.cpu_data(); for (int item_id = 0; item_id < batch_size; ++item_id) {//读取一图片 // get a blob CHECK_GT(lines_size, lines_id_); if (!ReadImageToDatum(lines_[lines_id_].first, lines_[lines_id_].second, new_height, new_width, &datum)) { continue; } const string& data = datum.data(); if (crop_size) { CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. h_off = (height - crop_size) / 2; w_off = (width - crop_size) / 2; // Normal copy 正常读取,把裁剪后的图片数据读给top_data for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = ((item_id * channels + c) * crop_size + h) * crop_size + w; int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } else { // Just copy the whole data 正常读取,把图片数据读给top_data if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]) * scale; } } else { for (int j = 0; j < size; ++j) { top_data[item_id * size + j] = (datum.float_data(j) - mean[j]) * scale; } } } top_label[item_id] = datum.label();//读取该图片的标签 } }
void DataTransformer<Dtype>::Transform(const Datum& datum, Blob<Dtype>* transformed_blob, int &h_off, int &w_off, int &do_mirror, vector<float> & col_ranges) { const int img_channels = datum.channels(); const int img_height = datum.height(); const int img_width = datum.width(); const int channels = transformed_blob->channels(); const int height = transformed_blob->height(); const int width = transformed_blob->width(); const int num = transformed_blob->num(); //CHECK_EQ(channels, img_channels); CHECK_LE(height, img_height); CHECK_LE(width, img_width); CHECK_GE(num, 1); CHECK_EQ(img_channels, col_ranges.size()); const int crop_size = param_.crop_size(); const Dtype scale = param_.scale(); const bool has_mean_file = param_.has_mean_file(); const bool has_mean_values = mean_values_.size() > 0; if (do_mirror == -1) { do_mirror = param_.mirror() && Rand(2); } CHECK_GT(img_channels, 0); CHECK_GE(img_height, crop_size); CHECK_GE(img_width, crop_size); Dtype* mean = NULL; if (has_mean_file) { CHECK_EQ(img_channels, data_mean_.channels()); if( (img_height == data_mean_.height() && img_width == data_mean_.width() ) || (crop_size == data_mean_.height() && crop_size == data_mean_.width() ) ) { mean = data_mean_.mutable_cpu_data(); } else { CHECK_EQ(img_height, data_mean_.height()); CHECK_EQ(img_width, data_mean_.width()); } } if (has_mean_values) { CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) << "Specify either 1 mean_value or as many as channels: " << img_channels; if (img_channels > 1 && mean_values_.size() == 1) { // Replicate the mean_value for simplicity for (int c = 1; c < img_channels; ++c) { mean_values_.push_back(mean_values_[0]); } } } //cv::Mat cv_cropped_img = cv_img; if (crop_size) { CHECK_EQ(crop_size, height); CHECK_EQ(crop_size, width); // We only do random crop when we do training. if (phase_ == TRAIN) { if (h_off == -1 && w_off == -1) { h_off = Rand(img_height - crop_size + 1); w_off = Rand(img_width - crop_size + 1); } } else { if (h_off == -1 && w_off == -1) { h_off = (img_height - crop_size) / 2; w_off = (img_width - crop_size) / 2; } } //cv::Rect roi(w_off, h_off, crop_size, crop_size); //cv_cropped_img = cv_img(roi); } else { h_off = 0; w_off = 0; CHECK_EQ(img_height, height); CHECK_EQ(img_width, width); } //CHECK(cv_cropped_img.data); Dtype* transformed_data = transformed_blob->mutable_cpu_data(); int top_index; // debug /*char ss1[1010]; sprintf(ss1,"/home/xiaolonw/opt_flows/temp_results/sth.jpg"); cv::Mat img(Size(crop_size, crop_size), CV_8UC1);*/ for (int h = 0; h < height; ++h) { int img_index = 0; for (int w = 0; w < width; ++w) { for (int c = 0; c < img_channels; ++c) { float now_col = col_ranges[c]; if (do_mirror) { top_index = (c * height + h) * width + (width - 1 - w); } else { top_index = (c * height + h) * width + w; } img_index = (c * img_height + h + h_off) * img_width + w + w_off; Dtype pixel = datum.float_data(img_index); // color augmentation pixel = std::min( pixel * now_col, Dtype(255.0) ); // debug //img.at<uchar>(h, w) = (uchar)(pixel); if (has_mean_file) { int mean_index = (c * img_height + h_off + h) * img_width + w_off + w; if (crop_size == data_mean_.height() && crop_size == data_mean_.width() ) { mean_index = (c * height + h) * width + w; } transformed_data[top_index] = (pixel - mean[mean_index]) * scale; } else { if (has_mean_values) { transformed_data[top_index] = (pixel - mean_values_[c]) * scale; } else { transformed_data[top_index] = pixel * scale; } } } } } //imwrite(ss1,img); }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); std::ifstream infile(argv[1]); std::vector<std::pair<string, int> > lines; string filename; int label; while (infile >> filename >> label) { lines.push_back(std::make_pair(filename, label)); } LOG(INFO) << "A total of " << lines.size() << " images."; Datum datum; BlobProto sum_blob; int count = 0; if (!ReadImageToDatum(lines[0].first, lines[0].second, resize_height, resize_width, is_color, &datum)) { return -1; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; for (int line_id = 0; line_id < lines.size(); ++line_id) { if (!ReadImageToDatum(lines[line_id].first, lines[line_id].second, resize_height, resize_width, is_color, &datum)) { continue; } const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); return 0; }
void* PoseImageDataLayerPrefetch(void* layer_pointer) { CHECK(layer_pointer); PoseImageDataLayer<Dtype>* layer = reinterpret_cast<PoseImageDataLayer<Dtype>*>(layer_pointer); CHECK(layer); Datum datum; CHECK(layer->prefetch_data_); Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); Dtype* top_label = layer->prefetch_label_->mutable_cpu_data(); PoseImageDataParameter pose_image_data_param = layer->layer_param_.pose_image_data_param(); const Dtype scale = pose_image_data_param.scale(); const int batch_size = pose_image_data_param.batch_size(); const int crop_size = pose_image_data_param.crop_size(); const bool mirror = pose_image_data_param.mirror(); const int new_height = pose_image_data_param.new_height(); const int new_width = pose_image_data_param.new_width(); const int out_height = pose_image_data_param.out_height(); const int out_width = pose_image_data_param.out_width(); const int key_point_range = pose_image_data_param.key_point_range(); const float scale_lower_bound = pose_image_data_param.scale_lower_bound(); const float scale_upper_bound = pose_image_data_param.scale_upper_bound(); const int key_point_num = pose_image_data_param.key_point_num(); const int randmargin = pose_image_data_param.randmargin(); const int use_mode = pose_image_data_param.use_mode(); const float torso_ratio = pose_image_data_param.torso_ratio(); const int mx1 = pose_image_data_param.mx1(); const int mx2 = pose_image_data_param.mx2(); const int my1 = pose_image_data_param.my1(); const int my2 = pose_image_data_param.my2(); const bool color_aug = pose_image_data_param.color_aug(); if (mirror && crop_size == 0) { LOG(FATAL) << "Current implementation requires mirror and crop_size to be " << "set at the same time."; } // datum scales const int channels = layer->datum_channels_; const int height = layer->datum_height_; const int width = layer->datum_width_; const int size = layer->datum_size_; const int lines_size = layer->lines_.size(); const Dtype* mean = layer->data_mean_.cpu_data(); int * was = new int[out_height * out_width]; for (int item_id = 0; item_id < batch_size; ++item_id) { char ss1[1010],ss2[1010]; sprintf(ss1,"/home/dragon123/cnncode/showimg/%d.jpg",item_id); //sprintf(ss2,"/home/dragon123/cnncode/showimg/%d_gt.jpg",item_id); // get a blob float nowscale = 1; if (layer->phase_ == Caffe::TRAIN) nowscale = random(scale_lower_bound, scale_upper_bound); CHECK_GT(1.55, nowscale); CHECK_GT(nowscale, 0.95); CHECK_GT(lines_size, layer->lines_id_); if (use_mode == 1) { bool temp = PoseReadImageToDatum_mode1(layer->lines_[layer->lines_id_].first, layer->lines_[layer->lines_id_].second, new_height, new_width, &datum, nowscale, torso_ratio, mx1, mx2, my1, my2, randmargin); if (temp == false) continue; } else { bool temp = PoseReadImageToDatum_mode2(layer->lines_[layer->lines_id_].first, layer->lines_[layer->lines_id_].second, new_height, new_width, &datum, nowscale, torso_ratio, mx1, mx2, my1, my2, randmargin); if (temp == false) continue; } const string& data = datum.data(); if (new_height > 0 && new_width > 0) { CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. h_off = 0; w_off = 0; if (mirror && layer->PrefetchRand() % 2) { // Copy mirrored version for (int c = 0; c < channels; ++c) { float thisRand = 1; if(color_aug) { thisRand = random(0.8,1.2); } for (int h = 0; h < new_height; ++h) { for (int w = 0; w < new_width; ++w) { int top_index = ((item_id * channels + c) * new_height + h) * new_width + (new_width - 1 - w); int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; top_data[top_index] = min(top_data[top_index] * thisRand, (Dtype)(255.0)); } } } } else { // Normal copy //Mat img(Size(new_width,new_height), CV_8UC3); for (int c = 0; c < channels; ++c) { float thisRand = 1; if(color_aug) { thisRand = random(0.8,1.2); } for (int h = 0; h < new_height; ++h) { for (int w = 0; w < new_width; ++w) { int top_index = ((item_id * channels + c) * new_height + h) * new_width + w; int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; //img.at<cv::Vec3b>(h, w)[c] = (uchar)(datum_element * scale) * thisRand; top_data[top_index] = min(top_data[top_index] * thisRand, (Dtype)(255.0)); } } } //imwrite(ss1, img); } } else { // Just copy the whole data if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]) * scale; } } else { for (int j = 0; j < size; ++j) { top_data[item_id * size + j] = (datum.float_data(j) - mean[j]) * scale; } } } float lblratio = new_height / out_height; vector<int> pts; for (int label_i = 0; label_i < datum.label_size(); label_i++) { pts.push_back( datum.label(label_i) / lblratio ); } int lblLen = key_point_num * out_height * out_width; PoseReadLabel(pts, was, top_label + item_id * lblLen, out_height, out_width); /*for(int ci = 0; ci < key_point_num; ci ++) { Mat img(Size(out_height, out_width), CV_8UC3); sprintf(ss2,"/home/dragon123/cnncode/showimg/%d_%d_gt.jpg",item_id, ci); for(int h = 0; h < out_height; h ++) for(int w = 0; w < out_width; w ++) { int clr = top_label[item_id * lblLen + ci * out_height * out_width + h * out_width + w]; if(clr <= 0) { if(clr == 0) for(int c = 0; c < 3; c ++) img.at<cv::Vec3b>(h, w)[c] = 0; if(clr < 0) for(int c = 0; c < 3; c ++) img.at<cv::Vec3b>(h, w)[c] = 128; } else { for(int c = 0; c < 3; c ++) img.at<cv::Vec3b>(h, w)[c] = 255; } } imwrite(ss2, img); }*/ // go to the next iter layer->lines_id_++; if (layer->lines_id_ >= lines_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; layer->lines_id_ = 0; if (layer->layer_param_.pose_image_data_param().shuffle()) { layer->ShuffleImages(); } } } delete was; return reinterpret_cast<void*>(NULL); }
void DataLstmTrainHistLayer<Dtype>::InternalThreadEntry() { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(this->prefetch_data_.count()); Datum datum; Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); Dtype* top_label = this->prefetch_label_.mutable_cpu_data(); Dtype* top_hist = this->prefetch_hist_.mutable_cpu_data(); Dtype* top_marker = this->prefetch_marker_.mutable_cpu_data(); // datum scales const int size = resize_height*resize_width*3; const Dtype* mean = this->data_mean_.mutable_cpu_data(); string value; const int kMaxKeyLength = 256; char key_cstr[kMaxKeyLength]; int key; const int sequence_size = this->layer_param_.data_lstm_train_hist_param().sequence_size(); const int ind_seq_num=this->layer_param_.data_lstm_train_hist_param().sequence_num(); const int interval=this->layer_param_.data_lstm_train_hist_param().interval(); int item_id; for (int time_id = 0; time_id < sequence_size; ++time_id) { for (int seq_id = 0; seq_id < ind_seq_num; ++seq_id) { item_id=time_id*ind_seq_num+seq_id; timer.Start(); // get a blob key=buffer_key[seq_id]; // MUST be changed according to the size of the training set snprintf(key_cstr, kMaxKeyLength, "%08d", key); db_->Get(leveldb::ReadOptions(), string(key_cstr), &value); datum.ParseFromString(value); const string& data = datum.data(); read_time += timer.MicroSeconds(); timer.Start(); for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]); } for (int j = 0; j < para_dim; ++j) { top_label[item_id * para_dim + j] = datum.float_data(j); } top_marker[item_id] = datum.float_data(para_dim); if (buffer_marker[seq_id] == 0) { top_marker[item_id] = 0; buffer_marker[seq_id] = 1; } //////////////////////////////////// for hist if (top_marker[item_id] < 0.5) { for (int j = 0; j < para_dim; ++j) top_hist[item_id * para_dim + j] = 0; } else { if (time_id == 0) { top_hist[item_id * para_dim + 0] = hist_blob[seq_id * para_dim + 0]/1.1+0.5; top_hist[item_id * para_dim + 1] = hist_blob[seq_id * para_dim + 1]*0.17778+1.34445; top_hist[item_id * para_dim + 2] = hist_blob[seq_id * para_dim + 2]*0.14545+0.39091; top_hist[item_id * para_dim + 3] = hist_blob[seq_id * para_dim + 3]*0.17778-0.34445; top_hist[item_id * para_dim + 4] = hist_blob[seq_id * para_dim + 4]/95.0+0.12; top_hist[item_id * para_dim + 5] = hist_blob[seq_id * para_dim + 5]/95.0+0.12; top_hist[item_id * para_dim + 6] = hist_blob[seq_id * para_dim + 6]*0.14545+1.48181; top_hist[item_id * para_dim + 7] = hist_blob[seq_id * para_dim + 7]*0.16+0.98; top_hist[item_id * para_dim + 8] = hist_blob[seq_id * para_dim + 8]*0.16+0.02; top_hist[item_id * para_dim + 9] = hist_blob[seq_id * para_dim + 9]*0.14545-0.48181; top_hist[item_id * para_dim + 10] = hist_blob[seq_id * para_dim + 10]/95.0+0.12; top_hist[item_id * para_dim + 11] = hist_blob[seq_id * para_dim + 11]/95.0+0.12; top_hist[item_id * para_dim + 12] = hist_blob[seq_id * para_dim + 12]/95.0+0.12; top_hist[item_id * para_dim + 13] = hist_blob[seq_id * para_dim + 13]*0.6+0.2; } else { int pre_id=(time_id-1)*ind_seq_num+seq_id; top_hist[item_id * para_dim + 0] = top_label[pre_id * para_dim + 0]/1.1+0.5; top_hist[item_id * para_dim + 1] = top_label[pre_id * para_dim + 1]*0.17778+1.34445; top_hist[item_id * para_dim + 2] = top_label[pre_id * para_dim + 2]*0.14545+0.39091; top_hist[item_id * para_dim + 3] = top_label[pre_id * para_dim + 3]*0.17778-0.34445; top_hist[item_id * para_dim + 4] = top_label[pre_id * para_dim + 4]/95.0+0.12; top_hist[item_id * para_dim + 5] = top_label[pre_id * para_dim + 5]/95.0+0.12; top_hist[item_id * para_dim + 6] = top_label[pre_id * para_dim + 6]*0.14545+1.48181; top_hist[item_id * para_dim + 7] = top_label[pre_id * para_dim + 7]*0.16+0.98; top_hist[item_id * para_dim + 8] = top_label[pre_id * para_dim + 8]*0.16+0.02; top_hist[item_id * para_dim + 9] = top_label[pre_id * para_dim + 9]*0.14545-0.48181; top_hist[item_id * para_dim + 10] = top_label[pre_id * para_dim + 10]/95.0+0.12; top_hist[item_id * para_dim + 11] = top_label[pre_id * para_dim + 11]/95.0+0.12; top_hist[item_id * para_dim + 12] = top_label[pre_id * para_dim + 12]/95.0+0.12; top_hist[item_id * para_dim + 13] = top_label[pre_id * para_dim + 13]*0.6+0.2; } } //////////////////////////////////// for hist trans_time += timer.MicroSeconds(); buffer_key[seq_id]++; buffer_total[seq_id]++; if (buffer_key[seq_id]>total_frames || buffer_total[seq_id]>interval) { buffer_key[seq_id]=random(total_frames)+1; buffer_marker[seq_id]=0; buffer_total[seq_id]=0; } //////////////////////////////////// for hist if (time_id==sequence_size-1) { for (int j = 0; j < para_dim; ++j) hist_blob[seq_id * para_dim + j] = datum.float_data(j); } //////////////////////////////////// for hist /* if (seq_id == 0) { for (int h = 0; h < resize_height; ++h) { for (int w = 0; w < resize_width; ++w) { leveldbTrain->imageData[(h*resize_width+w)*3+0]=(uint8_t)data[h*resize_width+w]; leveldbTrain->imageData[(h*resize_width+w)*3+1]=(uint8_t)data[resize_height*resize_width+h*resize_width+w]; leveldbTrain->imageData[(h*resize_width+w)*3+2]=(uint8_t)data[resize_height*resize_width*2+h*resize_width+w]; //leveldbTrain->imageData[(h*resize_width+w)*3+0]=(uint8_t)top_data[item_id * size+h*resize_width+w]; //leveldbTrain->imageData[(h*resize_width+w)*3+1]=(uint8_t)top_data[item_id * size+resize_height*resize_width+h*resize_width+w]; //leveldbTrain->imageData[(h*resize_width+w)*3+2]=(uint8_t)top_data[item_id * size+resize_height*resize_width*2+h*resize_width+w]; } } cvShowImage("Image from leveldb", leveldbTrain); cvWaitKey( 1 ); } */ } } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; }
void DataTransformer<Dtype>::Transform(const int batch_item_id, const Datum& datum, const Dtype* mean, Dtype* transformed_data) { const string& data = datum.data(); const int channels = datum.channels(); const int height = datum.height(); const int width = datum.width(); const int size = datum.channels() * datum.height() * datum.width(); const int crop_size = param_.crop_size(); const bool mirror = param_.mirror(); const Dtype scale = param_.scale(); if (mirror && crop_size == 0) { LOG(FATAL) << "Current implementation requires mirror and crop_size to be " << "set at the same time."; } if (crop_size) { CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. if (phase_ == Caffe::TRAIN) { h_off = Rand() % (height - crop_size); w_off = Rand() % (width - crop_size); } else { h_off = (height - crop_size) / 2; w_off = (width - crop_size) / 2; } if (mirror && Rand() % 2) { // Copy mirrored version for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int data_index = (c * height + h + h_off) * width + w + w_off; int top_index = ((batch_item_id * channels + c) * crop_size + h) * crop_size + (crop_size - 1 - w); Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); transformed_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } else { // Normal copy for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = ((batch_item_id * channels + c) * crop_size + h) * crop_size + w; int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); transformed_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } } else { // we will prefer to use data() first, and then try float_data() if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); transformed_data[j + batch_item_id * size] = (datum_element - mean[j]) * scale; } } else { for (int j = 0; j < size; ++j) { transformed_data[j + batch_item_id * size] = (datum.float_data(j) - mean[j]) * scale; } } } }
void DataTransformer<Dtype>::Transform(const int batch_item_id, const Datum& datum, const Dtype* mean, Dtype* transformed_data, Dtype* temp_data) { const string& data = datum.data(); const int channels = datum.channels(); const int height = datum.height(); const int width = datum.width(); const int size = datum.channels() * datum.height() * datum.width(); const int crop_size = param_.crop_size(); const bool mirror = param_.mirror(); const bool rotate = param_.rotate(); const Dtype scale = param_.scale(); const int window_size = param_.window_size(); int newHeight = datum.height(); int newWidth = datum.width(); if(crop_size) { newHeight = crop_size; newWidth = crop_size; } else if(window_size) { newHeight = window_size; newWidth = window_size; } if (window_size && crop_size) { LOG(FATAL) << "Current implementation does not support window_size and crop_size to be " << "set at the same time."; } if (crop_size || window_size) { CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. if (phase_ == Caffe::TRAIN && crop_size) { h_off = Rand() % (height - crop_size); w_off = Rand() % (width - crop_size); } else { h_off = (height - newHeight) / 2; w_off = (width - newWidth) / 2; } // Normal copy for (int c = 0; c < channels; ++c) { for (int h = 0; h < newHeight; ++h) { for (int w = 0; w < newWidth; ++w) { int top_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w; int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); transformed_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } else { //LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Normal::" << batch_item_id; // we will prefer to use data() first, and then try float_data() if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); transformed_data[j + batch_item_id * size] = (datum_element - mean[j]) * scale; } } else { for (int j = 0; j < size; ++j) { transformed_data[j + batch_item_id * size] = (datum.float_data(j) - mean[j]) * scale; } } } //Perform mirroring on the transformed_data using a temp_data first then copy it back if (mirror && Rand() % 3) { // Copy mirrored version if(Rand()%2){ //Mirror vertical //LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Mirror vertical::" << batch_item_id; for (int c = 0; c < channels; ++c) { for (int h = 0; h < newHeight; ++h) { for (int w = 0; w < newWidth; ++w) { int data_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w; int top_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + (newWidth - 1 - w); Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(transformed_data[data_index])); temp_data[top_index] = datum_element; } } } }else{ //Mirror horizontal //LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Mirror horizontal::" << batch_item_id; for (int c = 0; c < channels; ++c) { for (int h = 0; h < newHeight; ++h) { for (int w = 0; w < newWidth; ++w) { int data_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w; int top_index = ((batch_item_id * channels + c) * newHeight + (newHeight - 1 -h)) * newWidth + w; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(transformed_data[data_index])); temp_data[top_index] = datum_element; } } } } for (int c = 0; c < channels; ++c) { for (int h = 0; h < newHeight; ++h) { for (int w = 0; w < newWidth; ++w) { int top_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(temp_data[top_index])); transformed_data[top_index] = datum_element; } } } } // Perform rotation on the transformed_data using a temp_data first then copy it back if(rotate && Rand() %3) { int r = Rand() % 2; if(r == 0) {//Rotate 90 //LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Rotate 90::" << batch_item_id; for (int c = 0; c < channels; ++c) { for (int h = 0; h < newHeight; ++h) { for (int w = 0; w < newWidth; ++w) { int top_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w; int new_top_index = ((batch_item_id * channels + c) * newHeight * newWidth) + h + (newWidth - 1 -w) * newWidth; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(transformed_data[top_index])); temp_data[new_top_index] = datum_element; } } } }else if(r ==1) { //Rotate -90 //LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Rotate -90::" << batch_item_id; for (int c = 0; c < channels; ++c) { for (int h = 0; h < newHeight; ++h) { for (int w = 0; w < newWidth; ++w) { int top_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w; int new_top_index = ((batch_item_id * channels + c) * newHeight * newWidth) + (newWidth - 1 -h) + (w * newWidth); Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(transformed_data[top_index])); temp_data[new_top_index] = datum_element; } } } } else if(r ==2) { //Rotate 180 for (int c = 0; c < channels; ++c) { for (int h = 0; h < newHeight; ++h) { for (int w = 0; w < newWidth; ++w) { int top_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w; int new_top_index = ((batch_item_id * channels + c) * newHeight + (newHeight-h-1)) * newWidth + (newWidth -w-1); Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(transformed_data[top_index])); temp_data[new_top_index] = datum_element; } } } } for (int c = 0; c < channels; ++c) { for (int h = 0; h < newHeight; ++h) { for (int w = 0; w < newWidth; ++w) { int top_index = ((batch_item_id * channels + c) * newHeight + h) * newWidth + w; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(temp_data[top_index])); transformed_data[top_index] = datum_element; } } } } //LOG(INFO) << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> -------------------::" << batch_item_id; }
void DataTransformer<Dtype>::Transform(const Datum& datum, Dtype* transformed_data) { const string& data = datum.data(); const int datum_channels = datum.channels(); const int datum_height = datum.height(); const int datum_width = datum.width(); const int crop_size = param_.crop_size(); const Dtype scale = param_.scale(); bool do_mirror = param_.mirror() && Rand(2); //aki_update const bool has_mean_file = param_.has_mean_file(); const bool has_uint8 = data.size() > 0; const bool has_mean_values = mean_values_.size() > 0; CHECK_GT(datum_channels, 0); CHECK_GE(datum_height, crop_size); CHECK_GE(datum_width, crop_size); Dtype* mean = NULL; if (has_mean_file) { CHECK_EQ(datum_channels, data_mean_.channels()); CHECK_EQ(datum_height, data_mean_.height()); CHECK_EQ(datum_width, data_mean_.width()); mean = data_mean_.mutable_cpu_data(); } if (has_mean_values) { CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) << "Specify either 1 mean_value or as many as channels: " << datum_channels; if (datum_channels > 1 && mean_values_.size() == 1) { // Replicate the mean_value for simplicity for (int c = 1; c < datum_channels; ++c) { mean_values_.push_back(mean_values_[0]); } } } int height = datum_height; int width = datum_width; int h_off = 0; int w_off = 0; if (crop_size) { height = crop_size; width = crop_size; // We only do random crop when we do training. if (phase_ == TRAIN) { h_off = Rand(datum_height - crop_size + 1); w_off = Rand(datum_width - crop_size + 1); } else { h_off = (datum_height - crop_size) / 2; w_off = (datum_width - crop_size) / 2; } } //aki_update_start //use the multiview strategy in testing const bool use_multiview = param_.multi_view(); if (use_multiview) { std::ifstream in_stream(std::string("multiview_cache").c_str()); int view_type = 0; in_stream >> view_type; in_stream.close(); if (view_type > 5) { //it means we have to use mirror right here do_mirror = true; view_type-=5; } switch(view_type){ case 1: h_off = 0; w_off = 0; break; case 2: h_off = 0; w_off = datum_width - crop_size; break; case 3: h_off = datum_width - crop_size; w_off = 0; break; case 4: h_off = datum_width - crop_size; w_off = datum_width - crop_size; break; case 5: h_off = (datum_height - crop_size) / 2; w_off = (datum_width - crop_size) / 2; break; default: break; } } //aki_update_end Dtype datum_element; int top_index, data_index; for (int c = 0; c < datum_channels; ++c) { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { data_index = (c * datum_height + h_off + h) * datum_width + w_off + w; if (do_mirror) { top_index = (c * height + h) * width + (width - 1 - w); } else { top_index = (c * height + h) * width + w; } if (has_uint8) { datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); } else { datum_element = datum.float_data(data_index); } if (has_mean_file) { transformed_data[top_index] = (datum_element - mean[data_index]) * scale; } else { if (has_mean_values) { transformed_data[top_index] = (datum_element - mean_values_[c]) * scale; } else { transformed_data[top_index] = datum_element * scale; } } } } } }
void* DataLayerPrefetch(void* layer_pointer) { CHECK(layer_pointer); DataLayer<Dtype>* layer = static_cast<DataLayer<Dtype>*>(layer_pointer); CHECK(layer); Datum datum; CHECK(layer->prefetch_data_); Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); Dtype* top_label; if (layer->output_labels_) { top_label = layer->prefetch_label_->mutable_cpu_data(); } const Dtype scale = layer->layer_param_.data_param().scale(); const int batch_size = layer->layer_param_.data_param().batch_size(); const int crop_size = layer->layer_param_.data_param().crop_size(); const bool mirror = layer->layer_param_.data_param().mirror(); if (mirror && crop_size == 0) { LOG(FATAL) << "Current implementation requires mirror and crop_size to be " << "set at the same time."; } // datum scales const int channels = layer->datum_channels_; const int height = layer->datum_height_; const int width = layer->datum_width_; const int size = layer->datum_size_; const Dtype* mean = layer->data_mean_.cpu_data(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob CHECK(layer->iter_); CHECK(layer->iter_->Valid()); datum.ParseFromString(layer->iter_->value().ToString()); const string& data = datum.data(); if (crop_size) { CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. if (layer->phase_ == Caffe::TRAIN) { h_off = layer->PrefetchRand() % (height - crop_size); w_off = layer->PrefetchRand() % (width - crop_size); } else { h_off = (height - crop_size) / 2; w_off = (width - crop_size) / 2; } if (mirror && layer->PrefetchRand() % 2) { // Copy mirrored version for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = ((item_id * channels + c) * crop_size + h) * crop_size + (crop_size - 1 - w); int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } else { // Normal copy for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = ((item_id * channels + c) * crop_size + h) * crop_size + w; int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } } else { // we will prefer to use data() first, and then try float_data() if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]) * scale; } } else { for (int j = 0; j < size; ++j) { top_data[item_id * size + j] = (datum.float_data(j) - mean[j]) * scale; } } } if (layer->output_labels_) { top_label[item_id] = datum.label(); } // go to the next iter layer->iter_->Next(); if (!layer->iter_->Valid()) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; layer->iter_->SeekToFirst(); } } return static_cast<void*>(NULL); }
void DataDrivingLayer<Dtype>::InternalThreadEntry() { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(this->prefetch_data_.count()); Datum datum; Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); Dtype* top_label = this->prefetch_label_.mutable_cpu_data(); // datum scales const int size = resize_height*resize_width*3; const Dtype* mean = this->data_mean_.mutable_cpu_data(); string value; const int kMaxKeyLength = 256; char key_cstr[kMaxKeyLength]; int key; const int batch_size = this->layer_param_.data_driving_param().batch_size(); for (int item_id = 0; item_id < batch_size; ++item_id) { timer.Start(); // get a blob key=random(484815)+1; // MUST be changed according to the size of the training set snprintf(key_cstr, kMaxKeyLength, "%08d", key); db_->Get(leveldb::ReadOptions(), string(key_cstr), &value); datum.ParseFromString(value); const string& data = datum.data(); read_time += timer.MicroSeconds(); timer.Start(); for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]); } for (int j = 0; j < para_dim; ++j) { top_label[item_id*para_dim+j] = datum.float_data(j); } trans_time += timer.MicroSeconds(); /* for (int h = 0; h < resize_height; ++h) { for (int w = 0; w < resize_width; ++w) { leveldbTrain->imageData[(h*resize_width+w)*3+0]=(uint8_t)data[h*resize_width+w]; leveldbTrain->imageData[(h*resize_width+w)*3+1]=(uint8_t)data[resize_height*resize_width+h*resize_width+w]; leveldbTrain->imageData[(h*resize_width+w)*3+2]=(uint8_t)data[resize_height*resize_width*2+h*resize_width+w]; //leveldbTrain->imageData[(h*resize_width+w)*3+0]=(uint8_t)top_data[item_id * size+h*resize_width+w]; //leveldbTrain->imageData[(h*resize_width+w)*3+1]=(uint8_t)top_data[item_id * size+resize_height*resize_width+h*resize_width+w]; //leveldbTrain->imageData[(h*resize_width+w)*3+2]=(uint8_t)top_data[item_id * size+resize_height*resize_width*2+h*resize_width+w]; } } cvShowImage("Image from leveldb", leveldbTrain); cvWaitKey( 1 ); */ } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc < 5) { LOG(ERROR) << "Usage: compute_image_mean input_list new_height new_width output_file [dropping_rate]"; return 1; } char* fn_list = argv[1]; const int height = atoi(argv[2]); const int width = atoi(argv[3]); char* fn_output = argv[4]; int sampling_rate = 1; if (argc >= 6){ sampling_rate = atoi(argv[5]); LOG(INFO) << "using sampling rate " << sampling_rate; } Datum datum; BlobProto sum_blob; int count = 0; std::ifstream infile(fn_list); string fn_frm; int label; infile >> fn_frm >> label; ReadImageToDatum(fn_frm, label, height, width, &datum); sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_length(1); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; int i = 0; while (infile >> fn_frm >> label) { i++; if (i % sampling_rate!=0){ continue; } ReadImageToDatum(fn_frm, label, height, width, &datum); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } infile.close(); if (count % 10000 != 0) { LOG(ERROR) << "Processed " << count << " files."; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk LOG(INFO) << "Write to " << fn_output; WriteProtoToBinaryFile(sum_blob, fn_output); return 0; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc < 3 || argc > 4) { LOG(ERROR) << "Usage: compute_image_mean input_db output_file" << " db_backend[leveldb or lmdb]"; return 1; } string db_backend = "lmdb"; if (argc == 4) { db_backend = string(argv[3]); } // Open leveldb leveldb::DB* db; leveldb::Options options; options.create_if_missing = false; leveldb::Iterator* it = NULL; // lmdb MDB_env* mdb_env; MDB_dbi mdb_dbi; MDB_val mdb_key, mdb_value; MDB_txn* mdb_txn; MDB_cursor* mdb_cursor; // Open db if (db_backend == "leveldb") { // leveldb LOG(INFO) << "Opening leveldb " << argv[1]; leveldb::Status status = leveldb::DB::Open( options, argv[1], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[1]; leveldb::ReadOptions read_options; read_options.fill_cache = false; it = db->NewIterator(read_options); it->SeekToFirst(); } else if (db_backend == "lmdb") { // lmdb LOG(INFO) << "Opening lmdb " << argv[1]; CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed"; CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS); // 1TB CHECK_EQ(mdb_env_open(mdb_env, argv[1], MDB_RDONLY, 0664), MDB_SUCCESS) << "mdb_env_open failed"; CHECK_EQ(mdb_txn_begin(mdb_env, NULL, MDB_RDONLY, &mdb_txn), MDB_SUCCESS) << "mdb_txn_begin failed"; CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS) << "mdb_open failed"; CHECK_EQ(mdb_cursor_open(mdb_txn, mdb_dbi, &mdb_cursor), MDB_SUCCESS) << "mdb_cursor_open failed"; CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST), MDB_SUCCESS); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } // set size info Datum datum; BlobProto sum_blob; int count = 0; // load first datum if (db_backend == "leveldb") { datum.ParseFromString(it->value().ToString()); } else if (db_backend == "lmdb") { datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } // start collecting LOG(INFO) << "Starting Iteration"; if (db_backend == "leveldb") { // leveldb for (it->SeekToFirst(); it->Valid(); it->Next()) { // just a dummy operation datum.ParseFromString(it->value().ToString()); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } } else if (db_backend == "lmdb") { // lmdb CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST), MDB_SUCCESS); do { // just a dummy operation datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } while (mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_NEXT) == MDB_SUCCESS); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } caffe::Blob<float> vis; vis.FromProto(sum_blob); caffe::imshow(&vis, 1, "mean img"); cv::waitKey(0); google::protobuf::RepeatedField<float>* tmp = sum_blob.mutable_data(); std::vector<float> mean_data(tmp->begin(), tmp->end()); double sum = std::accumulate(mean_data.begin(), mean_data.end(), 0.0); double mean2 = sum / mean_data.size(); double sq_sum = std::inner_product(mean_data.begin(), mean_data.end(), mean_data.begin(), 0.0); double stdev = std::sqrt(sq_sum / mean_data.size() - mean2 * mean2); LOG(INFO) << "mean of mean image: " << mean2 << " std: " << stdev; // Write to disk LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); // Clean up if (db_backend == "leveldb") { delete db; } else if (db_backend == "lmdb") { mdb_cursor_close(mdb_cursor); mdb_close(mdb_env, mdb_dbi); mdb_txn_abort(mdb_txn); mdb_env_close(mdb_env); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } return 0; }
void* DataLayerPrefetch(void* layer_pointer) { CHECK(layer_pointer); DataLayer<Dtype>* layer = reinterpret_cast<DataLayer<Dtype>*>(layer_pointer); CHECK(layer); Datum datum; CHECK(layer->prefetch_data_); Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); Dtype* top_label = layer->prefetch_label_->mutable_cpu_data(); const Dtype scale = layer->layer_param_.scale(); const int batchsize = layer->layer_param_.batchsize(); const int cropsize = layer->layer_param_.cropsize(); const bool mirror = layer->layer_param_.mirror(); if (mirror && cropsize == 0) { LOG(FATAL) << "Current implementation requires mirror and cropsize to be " << "set at the same time."; } // datum scales const int channels = layer->datum_channels_; const int height = layer->datum_height_; const int width = layer->datum_width_; const int size = layer->datum_size_; const Dtype* mean = layer->data_mean_.cpu_data(); for (int itemid = 0; itemid < batchsize; ++itemid) { // get a blob CHECK(layer->iter_); CHECK(layer->iter_->Valid()); datum.ParseFromString(layer->iter_->value().ToString()); const string& data = datum.data(); if (cropsize) { //CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. if (Caffe::phase() == Caffe::TRAIN) { // NOLINT_NEXT_LINE(runtime/threadsafe_fn) h_off = rand() % (height - cropsize); // NOLINT_NEXT_LINE(runtime/threadsafe_fn) w_off = rand() % (width - cropsize); } else { h_off = (height - cropsize) / 2; w_off = (width - cropsize) / 2; } // NOLINT_NEXT_LINE(runtime/threadsafe_fn) if (mirror && rand() % 2) { // Copy mirrored version for (int c = 0; c < channels; ++c) { for (int h = 0; h < cropsize; ++h) { for (int w = 0; w < cropsize; ++w) { top_data[((itemid * channels + c) * cropsize + h) * cropsize + cropsize - 1 - w] = (static_cast<Dtype>( (float)datum.float_data((c * height + h + h_off) * width + w + w_off)) - mean[(c * height + h + h_off) * width + w + w_off]) * scale; } } } } else { // Normal copy for (int c = 0; c < channels; ++c) { for (int h = 0; h < cropsize; ++h) { for (int w = 0; w < cropsize; ++w) { top_data[((itemid * channels + c) * cropsize + h) * cropsize + w] = (static_cast<Dtype>( (float)datum.float_data((c * height + h + h_off) * width + w + w_off)) - mean[(c * height + h + h_off) * width + w + w_off]) * scale; } } } } } else { // we will prefer to use data() first, and then try float_data() if (data.size()) { //cout << "unint8 data!!!!" << endl; for (int j = 0; j < size; ++j) { //cout << "datum.int_data " << j << "of size: " << size << static_cast<Dtype>((uint8_t)data[j]) << " mean: " << mean[j] << endl; top_data[itemid * size + j] = (static_cast<Dtype>((uint8_t)data[j]) - mean[j]) * scale; } } else { //cout << "float data !!!!!!!!!!!" << endl; for (int j = 0; j < size; ++j) { //cout << "item: " << itemid <<" datum.float_data " << j << "of size: " << size << endl; //cout << datum.float_data(j) << " mean: " << mean[j] << endl; top_data[itemid * size + j] = (datum.float_data(j) - mean[j]) * scale; } } } top_label[itemid] = datum.label(); // go to the next iter layer->iter_->Next(); if (!layer->iter_->Valid()) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; layer->iter_->SeekToFirst(); } } return reinterpret_cast<void*>(NULL); }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc < 4 || argc > 5) { printf("Convert a set of images to the leveldb format used\n" "as input for Caffe.\n" "Usage:\n" " convert_imageset ROOTFOLDER/ LISTFILE DB_NAME" " RANDOM_SHUFFLE_DATA[0 or 1]\n" "The ImageNet dataset for the training demo is at\n" " http://www.image-net.org/download-images\n"); return 1; } std::ifstream infile(argv[2]); if(!infile) LOG(INFO) <<"there is no file named " << argv[2]; std::vector<string> lines; string infor; int label; while (infile >> infor) { lines.push_back(infor); } if (argc == 5 && argv[4][0] == '1') { // randomly shuffle data LOG(INFO) << "Shuffling data"; std::random_shuffle(lines.begin()+1, lines.end()); } LOG(INFO) << "A total of " << lines.size() << " images."; leveldb::DB* db; leveldb::Options options; options.error_if_exists = true; options.create_if_missing = true; options.write_buffer_size = 268435456; LOG(INFO) << "Opening leveldb " << argv[3]; leveldb::Status status = leveldb::DB::Open( options, argv[3], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[3]; Datum datum; int count = 0; const int kMaxKeyLength = 256; char key_cstr[kMaxKeyLength]; leveldb::WriteBatch* batch = new leveldb::WriteBatch(); int data_size; bool data_size_initialized = false; int width = 0 , height = 0 , channel = 0; std::string::size_type pos1 = 0 , pos2 = 0; pos2 = lines[0].find(",", pos1); channel = atoi(lines[0].substr(pos1,pos2-pos1).c_str()); pos1 = pos2 + 1; pos2 = lines[0].find(",", pos1); height = atoi(lines[0].substr(pos1,pos2-pos1).c_str()); pos1 = pos2 + 1; pos2 = lines[0].find(",", pos1); width = atoi(lines[0].substr(pos1,pos2-pos1).c_str()); for (int line_id = 1; line_id < lines.size(); ++line_id) { if (!ReadCSVToDatum(lines[line_id], channel, width, height, &datum)) { continue; } if (!data_size_initialized) { data_size = datum.channels() * datum.height() * datum.width(); data_size_initialized = true; } else { ::google::protobuf::RepeatedField< float > data = datum.float_data(); CHECK_EQ(data.size(), data_size) << "Incorrect data field size " << data.size(); } // sequential snprintf(key_cstr, kMaxKeyLength, "%08d_%s", line_id, lines[line_id].c_str()); string value; // get the value datum.SerializeToString(&value); batch->Put(string(key_cstr), value); if (++count % 1000 == 0) { db->Write(leveldb::WriteOptions(), batch); LOG(ERROR) << "Processed " << count << " files."; delete batch; batch = new leveldb::WriteBatch(); } } // write the last batch if (count % 1000 != 0) { db->Write(leveldb::WriteOptions(), batch); LOG(ERROR) << "Processed " << count << " files."; } delete batch; delete db; return 0; }
void* ImageDataLayerPrefetch(void* layer_pointer) { CHECK(layer_pointer); ImageDataLayer<Dtype>* layer = reinterpret_cast<ImageDataLayer<Dtype>*>(layer_pointer); CHECK(layer); Datum datum; CHECK(layer->prefetch_data_); Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); Dtype* top_label = layer->prefetch_label_->mutable_cpu_data(); ImageDataParameter image_data_param = layer->layer_param_.image_data_param(); const Dtype scale = image_data_param.scale(); const int batch_size = image_data_param.batch_size(); const int crop_size = image_data_param.crop_size(); const bool mirror = image_data_param.mirror(); const int new_height = image_data_param.new_height(); const int new_width = image_data_param.new_width(); if (mirror && crop_size == 0) { LOG(FATAL) << "Current implementation requires mirror and crop_size to be " << "set at the same time."; } // datum scales const int channels = layer->datum_channels_; const int height = layer->datum_height_; const int width = layer->datum_width_; const int size = layer->datum_size_; const int lines_size = layer->shuffle_index_.size(); const Dtype* mean = layer->data_mean_.cpu_data(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob CHECK_GT(lines_size, layer->lines_id_); int id = layer->shuffle_index_[layer->lines_id_]; if (!ReadImageToDatum(layer->fn_list_[id], layer->label_list_[id], new_height, new_width, &datum)) { continue; } const string& data = datum.data(); if (crop_size) { CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. if (layer->phase_ == Caffe::TRAIN) { h_off = layer->PrefetchRand() % (height - crop_size); w_off = layer->PrefetchRand() % (width - crop_size); } else { h_off = (height - crop_size) / 2; w_off = (width - crop_size) / 2; } if (mirror && layer->PrefetchRand() % 2) { // Copy mirrored version for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = ((item_id * channels + c) * crop_size + h) * crop_size + (crop_size - 1 - w); int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } else { // Normal copy for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = ((item_id * channels + c) * crop_size + h) * crop_size + w; int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } } else { // Just copy the whole data if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]) * scale; } } else { for (int j = 0; j < size; ++j) { top_data[item_id * size + j] = (datum.float_data(j) - mean[j]) * scale; } } } top_label[item_id] = datum.label(); // go to the next iter layer->lines_id_++; if (layer->lines_id_ >= lines_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; layer->lines_id_ = 0; if (layer->layer_param_.image_data_param().shuffle()) { layer->ShuffleImages(); } } } return reinterpret_cast<void*>(NULL); }
std::vector<float> calc_mean(const std::string &db_fname) { scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); db->Open(db_fname, db::READ); scoped_ptr<db::Cursor> cursor(db->NewCursor()); BlobProto sum_blob; int count = 0; // load first datum Datum datum; datum.ParseFromString(cursor->value()); if (DecodeDatumNative(&datum)) { LOG(INFO) << "Decoding Datum"; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; while (cursor->valid()) { Datum datum; datum.ParseFromString(cursor->value()); DecodeDatumNative(&datum); const std::string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { CHECK_EQ(data.size(), size_in_datum); for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { CHECK_EQ(datum.float_data_size(), size_in_datum); for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(INFO) << "Processed " << count << " files."; } cursor->Next(); } if (count % 10000 != 0) { LOG(INFO) << "Processed " << count << " files."; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } const int channels = sum_blob.channels(); const int dim = sum_blob.height() * sum_blob.width(); std::vector<float> mean_values(channels, 0.0); LOG(INFO) << "Number of channels: " << channels; for (int c = 0; c < channels; ++c) { for (int i = 0; i < dim; ++i) { mean_values[c] += sum_blob.data(dim * c + i); } mean_values[c] /= dim; LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c]; } return mean_values; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); #ifdef USE_OPENCV #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif gflags::SetUsageMessage("Compute the mean_image of a set of images given by" " a leveldb/lmdb\n" "Usage:\n" " compute_image_mean [FLAGS] INPUT_DB [OUTPUT_FILE]\n"); gflags::ParseCommandLineFlags(&argc, &argv, true); if (argc < 2 || argc > 3) { gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/compute_image_mean"); return 1; } scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); db->Open(argv[1], db::READ); scoped_ptr<db::Cursor> cursor(db->NewCursor()); BlobProto sum_blob; int count = 0; // load first datum Datum datum; datum.ParseFromString(cursor->value()); if (DecodeDatumNative(&datum)) { LOG(INFO) << "Decoding Datum"; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; while (cursor->valid()) { Datum datum; datum.ParseFromString(cursor->value()); DecodeDatumNative(&datum); const std::string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { CHECK_EQ(data.size(), size_in_datum); for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { CHECK_EQ(datum.float_data_size(), size_in_datum); for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(INFO) << "Processed " << count << " files."; printf("Processed %d files.\n",count); } cursor->Next(); } if (count % 10000 != 0) { LOG(INFO) << "Processed " << count << " files."; printf("Processed %d files.\n",count); } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk if (argc == 3) { LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); } const int channels = sum_blob.channels(); const int dim = sum_blob.height() * sum_blob.width(); std::vector<float> mean_values(channels, 0.0); LOG(INFO) << "Number of channels: " << channels; for (int c = 0; c < channels; ++c) { for (int i = 0; i < dim; ++i) { mean_values[c] += sum_blob.data(dim * c + i); } LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim; } #else LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; #endif // USE_OPENCV return 0; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc != 3) { LOG(ERROR) << "Usage: compute_image_mean input_leveldb output_file"; return 1; } leveldb::DB* db; leveldb::Options options; options.create_if_missing = false; LOG(INFO) << "Opening leveldb " << argv[1]; leveldb::Status status = leveldb::DB::Open( options, argv[1], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[1]; leveldb::ReadOptions read_options; read_options.fill_cache = false; leveldb::Iterator* it = db->NewIterator(read_options); it->SeekToFirst(); Datum datum; BlobProto sum_blob; int count = 0; datum.ParseFromString(it->value().ToString()); sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; for (it->SeekToFirst(); it->Valid(); it->Next()) { // just a dummy operation datum.ParseFromString(it->value().ToString()); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } if (count % 10000 != 0) { LOG(ERROR) << "Processed " << count << " files."; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); delete db; return 0; }
void DataTransformer<Dtype>::Transform(const Datum& datum, Dtype* transformed_data) { const string& data = datum.data(); const int datum_channels = datum.channels(); const int datum_height = datum.height(); const int datum_width = datum.width(); const int crop_size = param_.crop_size(); const Dtype scale = param_.scale(); const bool do_mirror = param_.mirror() && Rand(2); const bool has_mean_file = param_.has_mean_file(); const bool has_uint8 = data.size() > 0; const bool has_mean_values = mean_values_.size() > 0; // mask_size is defaulted to 0 in caffe/proto/caffe.proto const int mask_size = param_.mask_size(); // mask_freq is defaulted to 1 in 3 in caffe/proto/caffe.proto const int mask_freq = param_.mask_freq(); CHECK_GT(datum_channels, 0); CHECK_GE(datum_height, crop_size); CHECK_GE(datum_width, crop_size); Dtype* mean = NULL; if (has_mean_file) { CHECK_EQ(datum_channels, data_mean_.channels()); CHECK_EQ(datum_height, data_mean_.height()); CHECK_EQ(datum_width, data_mean_.width()); mean = data_mean_.mutable_cpu_data(); } if (has_mean_values) { CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) << "Specify either 1 mean_value or as many as channels: " << datum_channels; if (datum_channels > 1 && mean_values_.size() == 1) { // Replicate the mean_value for simplicity for (int c = 1; c < datum_channels; ++c) { mean_values_.push_back(mean_values_[0]); } } } int height = datum_height; int width = datum_width; int h_off = 0; int w_off = 0; if (crop_size) { height = crop_size; width = crop_size; // We only do random crop when we do training. if (phase_ == TRAIN) { h_off = Rand(datum_height - crop_size + 1); w_off = Rand(datum_width - crop_size + 1); } else { h_off = (datum_height - crop_size) / 2; w_off = (datum_width - crop_size) / 2; } } // initialize masking offsets to be same as cropping offsets // so that there is no conflict bool masking = (phase_ == TRAIN) && (mask_size > 0) && (Rand(mask_freq) == 0); int h_mask_start = h_off; int w_mask_start = w_off; if (masking) { int h_effective = datum_height; int w_effective = datum_width; if (crop_size) { h_effective = w_effective = crop_size; } CHECK_GE(h_effective, mask_size); CHECK_GE(w_effective, mask_size); h_mask_start += Rand(h_effective-mask_size+1); w_mask_start += Rand(w_effective-mask_size+1); } int h_mask_end = h_mask_start + mask_size; int w_mask_end = w_mask_start + mask_size; Dtype datum_element; int top_index, data_index; for (int c = 0; c < datum_channels; ++c) { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { data_index = (c * datum_height + h_off + h) * datum_width + w_off + w; if (do_mirror) { top_index = (c * height + h) * width + (width - 1 - w); } else { top_index = (c * height + h) * width + w; } if (has_uint8) { datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); } else { datum_element = datum.float_data(data_index); } if (has_mean_file) { transformed_data[top_index] = (datum_element - mean[data_index]) * scale; } else { if (has_mean_values) { transformed_data[top_index] = (datum_element - mean_values_[c]) * scale; } else { transformed_data[top_index] = datum_element * scale; } } if (masking) { if ((h > h_mask_start) && (w > w_mask_start) && (h < h_mask_end) && (w < w_mask_end)) { transformed_data[top_index] = 0; } } } } } }
void DataTransformer<Dtype>::Transform(const Datum& datum, Dtype* transformed_data) { const string& data = datum.data(); const int datum_channels = datum.channels(); const int datum_height = datum.height(); const int datum_width = datum.width(); const int crop_size = param_.crop_size(); const Dtype scale = param_.scale(); const bool do_mirror = param_.mirror() && Rand(2); const bool has_mean_file = param_.has_mean_file(); const bool has_uint8 = data.size() > 0; const bool has_mean_values = mean_values_.size() > 0; CHECK_GT(datum_channels, 0); CHECK_GE(datum_height, crop_size); CHECK_GE(datum_width, crop_size); Dtype* mean = NULL; if (has_mean_file) { CHECK_EQ(datum_channels, data_mean_.channels()); CHECK_EQ(datum_height, data_mean_.height()); CHECK_EQ(datum_width, data_mean_.width()); mean = data_mean_.mutable_cpu_data(); } if (has_mean_values) { CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) << "Specify either 1 mean_value or as many as channels: " << datum_channels; if (datum_channels > 1 && mean_values_.size() == 1) { // Replicate the mean_value for simplicity for (int c = 1; c < datum_channels; ++c) { mean_values_.push_back(mean_values_[0]); } } } int height = datum_height; int width = datum_width; int h_off = 0; int w_off = 0; if (crop_size) { height = crop_size; width = crop_size; // We only do random crop when we do training. if (phase_ == TRAIN) { h_off = Rand(datum_height - crop_size + 1); w_off = Rand(datum_width - crop_size + 1); } else { h_off = (datum_height - crop_size) / 2; w_off = (datum_width - crop_size) / 2; } } Dtype datum_element; int top_index, data_index; for (int c = 0; c < datum_channels; ++c) { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { data_index = (c * datum_height + h_off + h) * datum_width + w_off + w; if (do_mirror) { top_index = (c * height + h) * width + (width - 1 - w); } else { top_index = (c * height + h) * width + w; } if (has_uint8) { datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); } else { datum_element = datum.float_data(data_index); } if (has_mean_file) { transformed_data[top_index] = (datum_element - mean[data_index]) * scale; } else { if (has_mean_values) { transformed_data[top_index] = (datum_element - mean_values_[c]) * scale; } else { transformed_data[top_index] = datum_element * scale; } } } } } }
void* DataLayerPrefetch(void* layer_pointer) { CHECK(layer_pointer); DataLayer<Dtype>* layer = static_cast<DataLayer<Dtype>*>(layer_pointer); CHECK(layer); Datum datum; CHECK(layer->prefetch_data_); Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); //数据 Dtype* top_label; //标签 if (layer->output_labels_) { top_label = layer->prefetch_label_->mutable_cpu_data(); } const Dtype scale = layer->layer_param_.data_param().scale(); const int batch_size = layer->layer_param_.data_param().batch_size(); const int crop_size = layer->layer_param_.data_param().crop_size(); const bool mirror = layer->layer_param_.data_param().mirror(); if (mirror && crop_size == 0) {//当前实现需要同时设置mirror和cropsize LOG(FATAL) << "Current implementation requires mirror and crop_size to be " << "set at the same time."; } // datum scales const int channels = layer->datum_channels_; const int height = layer->datum_height_; const int width = layer->datum_width_; const int size = layer->datum_size_; const Dtype* mean = layer->data_mean_.cpu_data(); for (int item_id = 0; item_id < batch_size; ++item_id) { //每一批数据的数量是batchsize,一个循环拉取一张 // get a blob CHECK(layer->iter_); CHECK(layer->iter_->Valid()); datum.ParseFromString(layer->iter_->value().ToString());//利用迭代器拉取下一批数据 const string& data = datum.data(); int label_blob_channels = layer->prefetch_label_->channels(); int label_data_dim = datum.label_size(); CHECK_EQ(layer->prefetch_label_->channels(), datum.label_size()) << "label size is NOT the same."; if (crop_size) {//如果需要裁剪 CHECK(data.size()) << "Image cropping only support uint8 data"; int h_off, w_off; // We only do random crop when we do training. //只是在训练阶段做随机裁剪 if (layer->phase_ == Caffe::TRAIN) { h_off = layer->PrefetchRand() % (height - crop_size); w_off = layer->PrefetchRand() % (width - crop_size); } else {//测试阶段固定裁剪 h_off = (height - crop_size) / 2; w_off = (width - crop_size) / 2; } //怎么感觉下面两种情况的代码是一样的? if (mirror && layer->PrefetchRand() % 2) { // Copy mirrored version for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = ((item_id * channels + c) * crop_size + h) * crop_size + (crop_size - 1 - w); int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } else {//如果不需要裁剪 // Normal copy //我们优先考虑data(),然后float_data() for (int c = 0; c < channels; ++c) { for (int h = 0; h < crop_size; ++h) { for (int w = 0; w < crop_size; ++w) { int top_index = ((item_id * channels + c) * crop_size + h) * crop_size + w; int data_index = (c * height + h + h_off) * width + w + w_off; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[data_index])); top_data[top_index] = (datum_element - mean[data_index]) * scale; } } } } } else { // we will prefer to use data() first, and then try float_data() if (data.size()) { for (int j = 0; j < size; ++j) { Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[j])); top_data[item_id * size + j] = (datum_element - mean[j]) * scale; } } else { for (int j = 0; j < size; ++j) { top_data[item_id * size + j] = (datum.float_data(j) - mean[j]) * scale; } } } if (g_item_id++ < 5) { int label_size = datum.label_size(); int image_label = 0; for (int j = 0; j < label_size; ++j) { if (datum.label(j) == 1) { image_label = j; break; } } char strImgRawDataFile[255] = ""; sprintf(strImgRawDataFile, "caffe_%s_%05d_%d%s", "train", item_id, image_label, ".txt"); ofstream fout_image_raw_data(strImgRawDataFile); for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { int pixel_index = h * height + w; Dtype datum_element = static_cast<Dtype>(static_cast<uint8_t>(data[pixel_index])); char strHexByte[3] = ""; sprintf(strHexByte, "%02X", (unsigned char)datum_element); fout_image_raw_data<<" "<<strHexByte; } fout_image_raw_data<<endl; } fout_image_raw_data<<endl; for (int j = 0; j < label_size; ++j) { fout_image_raw_data<<datum.label(j); } fout_image_raw_data.close(); } if (layer->output_labels_) { int label_size = datum.label_size(); for (int j = 0; j < label_size; ++j) { top_label[item_id * label_size + j] = datum.label(j); } //top_label[item_id] = datum.label(); } // go to the next iter layer->iter_->Next(); if (!layer->iter_->Valid()) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; layer->iter_->SeekToFirst(); } } return static_cast<void*>(NULL); }