// Fill the DB with data: if unique_pixels, each pixel is unique but // all images are the same; else each image is unique but all pixels within // an image are the same. void Fill(const bool unique_pixels, DataParameter_DB backend) { backend_ = backend; LOG(INFO) << "Using temporary dataset " << *filename_; scoped_ptr<db::DB> db(db::GetDB(backend)); db->Open(*filename_, db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); for (int i = 0; i < 5; ++i) { Datum datum; datum.set_label(i); datum.set_channels(2); datum.set_height(3); datum.set_width(4); std::string* data = datum.mutable_data(); for (int j = 0; j < 24; ++j) { int datum = unique_pixels ? j : i; data->push_back(static_cast<uint8_t>(datum)); } stringstream ss; ss << i; string out; CHECK(datum.SerializeToString(&out)); txn->Put(ss.str(), out); } txn->Commit(); db->Close(); }
void DataReader::Body::read_one(db::Cursor* cursor, db::Transaction* dblt, QueuePair* qp) { Datum* datum = qp->free_.pop(); // TODO deserialize in-place instead of copy? datum->ParseFromString(cursor->value()); if (dblt != NULL) { string labels; CHECK_EQ(dblt->Get(cursor->key(), labels), 0); Datum labelDatum; labelDatum.ParseFromString(labels); // datum->MergeFrom(labelDatum); datum->set_channels(datum->channels() + labelDatum.channels()); datum->mutable_float_data()->MergeFrom(labelDatum.float_data()); datum->mutable_data()->append(labelDatum.data()); } qp->full_.push(datum); // go to the next iter cursor->Next(); if (!cursor->valid()) { DLOG(INFO) << "Restarting data prefetching from start."; cursor->SeekToFirst(); } }
void TestReshape(DataParameter_DB backend) { const int num_inputs = 5; // Save data of varying shapes. LOG(INFO) << "Using temporary dataset " << *filename_; scoped_ptr<db::DB> db(db::GetDB(backend)); db->Open(*filename_, db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); for (int i = 0; i < num_inputs; ++i) { Datum datum; datum.set_label(i); datum.set_channels(2); datum.set_height(i % 2 + 1); datum.set_width(i % 4 + 1); std::string* data = datum.mutable_data(); const int data_size = datum.channels() * datum.height() * datum.width(); for (int j = 0; j < data_size; ++j) { data->push_back(static_cast<uint8_t>(j)); } stringstream ss; ss << i; string out; CHECK(datum.SerializeToString(&out)); txn->Put(ss.str(), out); } txn->Commit(); db->Close(); // Load and check data of various shapes. LayerParameter param; param.set_phase(TEST); DataParameter* data_param = param.mutable_data_param(); data_param->set_batch_size(1); data_param->set_source(filename_->c_str()); data_param->set_backend(backend); DataLayer<Dtype> layer(param); layer.SetUp(blob_bottom_vec_, blob_top_vec_); EXPECT_EQ(blob_top_data_->num(), 1); EXPECT_EQ(blob_top_data_->channels(), 2); EXPECT_EQ(blob_top_label_->num(), 1); EXPECT_EQ(blob_top_label_->channels(), 1); EXPECT_EQ(blob_top_label_->height(), 1); EXPECT_EQ(blob_top_label_->width(), 1); for (int iter = 0; iter < num_inputs; ++iter) { layer.Forward(blob_bottom_vec_, blob_top_vec_); EXPECT_EQ(blob_top_data_->height(), iter % 2 + 1); EXPECT_EQ(blob_top_data_->width(), iter % 4 + 1); EXPECT_EQ(iter, blob_top_label_->cpu_data()[0]); const int channels = blob_top_data_->channels(); const int height = blob_top_data_->height(); const int width = blob_top_data_->width(); for (int c = 0; c < channels; ++c) { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { const int idx = (c * height + h) * width + w; EXPECT_EQ(idx, static_cast<int>(blob_top_data_->cpu_data()[idx])) << "debug: iter " << iter << " c " << c << " h " << h << " w " << w; } } } } }
// Fill the bottom blobs. void Fill(bool share_location) { int loc_classes = share_location ? 1 : num_classes_; // Create fake network which simulates a simple multi box network. vector<Blob<Dtype>*> fake_bottom_vec; vector<Blob<Dtype>*> fake_top_vec; LayerParameter layer_param; // Fake input (image) of size 20 x 20 Blob<Dtype>* fake_input = new Blob<Dtype>(num_, 3, 20, 20); // 1) Fill ground truth. #ifdef USE_LMDB string filename; GetTempDirname(&filename); DataParameter_DB backend = DataParameter_DB_LMDB; scoped_ptr<db::DB> db(db::GetDB(backend)); db->Open(filename, db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); for (int i = 0; i < num_; ++i) { AnnotatedDatum anno_datum; // Fill data. Datum* datum = anno_datum.mutable_datum(); datum->set_channels(3); datum->set_height(20); datum->set_width(20); std::string* data = datum->mutable_data(); for (int j = 0; j < 3*20*20; ++j) { data->push_back(static_cast<uint8_t>(j/100.)); } anno_datum.set_type(AnnotatedDatum_AnnotationType_BBOX); if (i == 0 || i == 2) { AnnotationGroup* anno_group = anno_datum.add_annotation_group(); anno_group->set_group_label(1); Annotation* anno = anno_group->add_annotation(); anno->set_instance_id(0); NormalizedBBox* bbox = anno->mutable_bbox(); bbox->set_xmin(0.1); bbox->set_ymin(0.1); bbox->set_xmax(0.3); bbox->set_ymax(0.3); bbox->set_difficult(i % 2); } if (i == 2) { AnnotationGroup* anno_group = anno_datum.add_annotation_group(); anno_group->set_group_label(2); Annotation* anno = anno_group->add_annotation(); anno->set_instance_id(0); NormalizedBBox* bbox = anno->mutable_bbox(); bbox->set_xmin(0.2); bbox->set_ymin(0.2); bbox->set_xmax(0.4); bbox->set_ymax(0.4); bbox->set_difficult(i % 2); anno = anno_group->add_annotation(); anno->set_instance_id(1); bbox = anno->mutable_bbox(); bbox->set_xmin(0.6); bbox->set_ymin(0.6); bbox->set_xmax(0.8); bbox->set_ymax(0.9); bbox->set_difficult((i + 1) % 2); } string key_str = caffe::format_int(i, 3); string out; CHECK(anno_datum.SerializeToString(&out)); txn->Put(key_str, out); } txn->Commit(); db->Close(); DataParameter* data_param = layer_param.mutable_data_param(); data_param->set_batch_size(num_); data_param->set_source(filename.c_str()); data_param->set_backend(backend); AnnotatedDataLayer<Dtype> anno_data_layer(layer_param); fake_top_vec.clear(); fake_top_vec.push_back(fake_input); fake_top_vec.push_back(blob_bottom_gt_); anno_data_layer.SetUp(fake_bottom_vec, fake_top_vec); anno_data_layer.Forward(fake_bottom_vec, fake_top_vec); #else FillerParameter filler_param; GaussianFiller<Dtype> filler(filler_param); filler.Fill(fake_input); vector<int> gt_shape(4, 1); gt_shape[2] = 4; gt_shape[3] = 8; blob_bottom_gt_->Reshape(gt_shape); Dtype* gt_data = blob_bottom_gt_->mutable_cpu_data(); FillItem(gt_data, "0 1 0 0.1 0.1 0.3 0.3 0"); FillItem(gt_data + 8, "2 1 0 0.1 0.1 0.3 0.3 0"); FillItem(gt_data + 8 * 2, "2 2 0 0.2 0.2 0.4 0.4 0"); FillItem(gt_data + 8 * 3, "2 2 1 0.6 0.6 0.8 0.9 1"); #endif // USE_LMDB // Fake layer PoolingParameter* pooling_param = layer_param.mutable_pooling_param(); pooling_param->set_pool(PoolingParameter_PoolMethod_AVE); pooling_param->set_kernel_size(10); pooling_param->set_stride(10); PoolingLayer<Dtype> pooling_layer(layer_param); Blob<Dtype>* fake_blob = new Blob<Dtype>(num_, 5, height_, width_); fake_bottom_vec.clear(); fake_bottom_vec.push_back(fake_input); fake_top_vec.clear(); fake_top_vec.push_back(fake_blob); pooling_layer.SetUp(fake_bottom_vec, fake_top_vec); pooling_layer.Forward(fake_bottom_vec, fake_top_vec); // 2) Fill bbox location predictions. ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); convolution_param->add_pad(0); convolution_param->add_kernel_size(1); convolution_param->add_stride(1); int num_output = num_priors_per_location_ * loc_classes * 4; convolution_param->set_num_output(num_output); convolution_param->mutable_weight_filler()->set_type("xavier"); convolution_param->mutable_bias_filler()->set_type("constant"); convolution_param->mutable_bias_filler()->set_value(0.1); ConvolutionLayer<Dtype> conv_layer_loc(layer_param); fake_bottom_vec.clear(); fake_bottom_vec.push_back(fake_blob); Blob<Dtype> fake_output_loc; fake_top_vec.clear(); fake_top_vec.push_back(&fake_output_loc); conv_layer_loc.SetUp(fake_bottom_vec, fake_top_vec); conv_layer_loc.Forward(fake_bottom_vec, fake_top_vec); // Use Permute and Flatten layer to prepare for MultiBoxLoss layer. PermuteParameter* permute_param = layer_param.mutable_permute_param(); permute_param->add_order(0); permute_param->add_order(2); permute_param->add_order(3); permute_param->add_order(1); PermuteLayer<Dtype> permute_layer(layer_param); fake_bottom_vec.clear(); fake_bottom_vec.push_back(&fake_output_loc); fake_top_vec.clear(); Blob<Dtype> fake_permute_loc; fake_top_vec.push_back(&fake_permute_loc); permute_layer.SetUp(fake_bottom_vec, fake_top_vec); permute_layer.Forward(fake_bottom_vec, fake_top_vec); FlattenParameter* flatten_param = layer_param.mutable_flatten_param(); flatten_param->set_axis(1); FlattenLayer<Dtype> flatten_layer(layer_param); vector<int> loc_shape(4, 1); loc_shape[0] = num_; loc_shape[1] = num_output * height_ * width_; blob_bottom_loc_->Reshape(loc_shape); fake_bottom_vec.clear(); fake_bottom_vec.push_back(&fake_permute_loc); fake_top_vec.clear(); fake_top_vec.push_back(blob_bottom_loc_); flatten_layer.SetUp(fake_bottom_vec, fake_top_vec); flatten_layer.Forward(fake_bottom_vec, fake_top_vec); // 3) Fill bbox confidence predictions. convolution_param->set_num_output(num_priors_per_location_ * num_classes_); ConvolutionLayer<Dtype> conv_layer_conf(layer_param); fake_bottom_vec.clear(); fake_bottom_vec.push_back(fake_blob); num_output = num_priors_per_location_ * num_classes_; Blob<Dtype> fake_output_conf; fake_top_vec.clear(); fake_top_vec.push_back(&fake_output_conf); conv_layer_conf.SetUp(fake_bottom_vec, fake_top_vec); conv_layer_conf.Forward(fake_bottom_vec, fake_top_vec); fake_bottom_vec.clear(); fake_bottom_vec.push_back(&fake_output_conf); fake_top_vec.clear(); Blob<Dtype> fake_permute_conf; fake_top_vec.push_back(&fake_permute_conf); permute_layer.SetUp(fake_bottom_vec, fake_top_vec); permute_layer.Forward(fake_bottom_vec, fake_top_vec); vector<int> conf_shape(4, 1); conf_shape[0] = num_; conf_shape[1] = num_output * height_ * width_; blob_bottom_conf_->Reshape(conf_shape); fake_bottom_vec.clear(); fake_bottom_vec.push_back(&fake_permute_conf); fake_top_vec.clear(); fake_top_vec.push_back(blob_bottom_conf_); flatten_layer.SetUp(fake_bottom_vec, fake_top_vec); flatten_layer.Forward(fake_bottom_vec, fake_top_vec); // 4) Fill prior bboxes. PriorBoxParameter* prior_box_param = layer_param.mutable_prior_box_param(); prior_box_param->add_min_size(5); prior_box_param->add_max_size(10); prior_box_param->add_aspect_ratio(3.); prior_box_param->set_flip(true); PriorBoxLayer<Dtype> prior_layer(layer_param); fake_bottom_vec.clear(); fake_bottom_vec.push_back(fake_blob); fake_bottom_vec.push_back(fake_input); fake_top_vec.clear(); fake_top_vec.push_back(blob_bottom_prior_); prior_layer.SetUp(fake_bottom_vec, fake_top_vec); prior_layer.Forward(fake_bottom_vec, fake_top_vec); delete fake_blob; delete fake_input; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc < 5) { printf("Convert a set of images to the leveldb format used\n" "as input for Caffe.\n" "Usage:\n" " convert_imageset ROOTFOLDER/ LABELFILE CONTEXT DB_NAME" " RANDOM_SHUFFLE_DATA[0 or 1]\n"); return 0; } std::vector<std::pair<string, vector<float> > > lines; { std::ifstream infile(argv[2]); vector<float> label(NUMLABEL, 0); while (infile.good()) { string filename; infile >> filename; if (filename.empty()) break; for (int i = 0; i < NUMLABEL; ++i) infile >> label[i]; lines.push_back(std::make_pair(filename, label)); } infile.close(); if (argc == 6 && argv[5][0] == '1') { // randomly shuffle data LOG(INFO)<< "Shuffling data"; std::random_shuffle(lines.begin(), lines.end()); } LOG(INFO)<< "A total of " << lines.size() << " images."; } std::map<string, vector<float> > map_name_contxt; { vector<float> contxt(NUMCONTEXT, 0); std::ifstream input(argv[3], 0); while (input.good()) { string filename; input >> filename; if (filename.empty()) break; for (int i = 0; i < NUMCONTEXT; ++i) input >> contxt[i]; map_name_contxt.insert(std::make_pair(filename, contxt)); } input.close(); } leveldb::DB* db; leveldb::Options options; options.error_if_exists = true; options.create_if_missing = true; options.write_buffer_size = 268435456; LOG(INFO)<< "Opening leveldb " << argv[4]; leveldb::Status status = leveldb::DB::Open(options, argv[4], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[4]; string root_folder(argv[1]); Datum datum; int count = 0; leveldb::WriteBatch* batch = new leveldb::WriteBatch(); int data_size; bool data_size_initialized = false; for (int line_id = 0; line_id < lines.size(); ++line_id) { const std::pair<string, vector<float> >& name_label = lines[line_id]; const string& name = name_label.first; const vector<float>& cur_labels = name_label.second; const vector<float>& cur_conxts = map_name_contxt.find(name)->second; // set image name datum.set_img_name(name); // set image data { const string img_full_name = root_folder + name; cv::Mat cv_img = cv::imread(img_full_name, CV_LOAD_IMAGE_COLOR); if (!cv_img.data) { LOG(ERROR)<< "Could not open or find file " << img_full_name; return false; } datum.set_channels(3); datum.set_height(cv_img.rows); datum.set_width(cv_img.cols); datum.clear_data(); datum.clear_float_data(); string* datum_string = datum.mutable_data(); for (int c = 0; c < 3; ++c) { for (int h = 0; h < cv_img.rows; ++h) { for (int w = 0; w < cv_img.cols; ++w) { datum_string->push_back( static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[c])); } } } } // set multi-label { datum.set_num_multi_label(NUMLABEL); datum.clear_multi_label(); datum.mutable_multi_label->Reserve(cur_labels.size()); for (int i = 0; i < cur_labels.size(); ++i) datum.add_multi_label(cur_labels[i]); } // set context { datum.set_num_context(NUMCONTEXT); datum.clear_context(); datum.mutable_context->Reserve(cur_conxts.size()); for (int i = 0; i < cur_conxts.size(); ++i) datum.add_context(cur_conxts[i]); } string value; // get the value datum.SerializeToString(&value); batch->Put(name, value); if (++count % 1000 == 0) { db->Write(leveldb::WriteOptions(), batch); LOG(ERROR)<< "Processed " << count << " files."; delete batch; batch = new leveldb::WriteBatch(); } } // write the last batch if (count % 1000 != 0) { db->Write(leveldb::WriteOptions(), batch); LOG(ERROR)<< "Processed " << count << " files."; } delete batch; delete db; return 0; }
//jin: modified from caffe/util/io.hpp:ReadImageToDatum 2016-01-13 16:17:56 //by default, only support gray scale images bool ReadImageRectToDatumArr(const string& img_filename, const int resize_height, const int resize_width, const vector<CNN_RECT> &cand_per_img, Datum *datum_per_img) { //jin:test //cout << "img_filename = " << img_filename << endl; if(resize_height <= 0 || resize_width <= 0 || resize_height != resize_width) { cerr<<"resize_height <=0 or resize_width <=0 or resize_height != resize_width)" << endl; return false; } int label = 0; // all negatives by default int cv_read_flag = CV_LOAD_IMAGE_GRAYSCALE; //int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); cv::Mat cv_img_origin = cv::imread(img_filename, cv_read_flag); if (!cv_img_origin.data) { LOG(ERROR) << "Could not open or find file " << img_filename; return false; } // test cv::namedWindow( "Image", 1 );//创建窗口 cv::imshow( "Image", cv_img_origin );//显示图像 cv::waitKey(0); //等待按键 // cv::destroyWindow( "Image" );//销毁窗口 //jin: 2016-02-23 09:18:42 convert mat into unsigned char and call get_img_rect instead of using cv::Rect unsigned char *image = cv_img_origin.data; int width = cv_img_origin.cols, height = cv_img_origin.rows; int num_channels = 1; //int num_channels = (is_color ? 3 : 1); int cand_size = cand_per_img.size(); //jin: determine maximal size needed to malloc int max_width = 0; for(int i=0; i<cand_size; ++i) { if (max_width < cand_per_img[i].width) { max_width = cand_per_img[i].width; } } unsigned char* img_rect = (unsigned char*) malloc(max_width*max_width*sizeof(unsigned char)); if(NULL == img_rect){ printf("Failed to malloc.\n"); return false; } unsigned char* img_resize = (unsigned char*) malloc(resize_height*resize_width*sizeof(unsigned char)); if(NULL == img_rect) { printf("Failed to malloc.\n"); return false; } for(int i=0; i<cand_size; ++i) { CNN_RECT rect = cand_per_img[i]; //display the rectangle Point pt1 = Point(rect.x, rect.y), pt2 = Point(rect.x+rect.w, rect.y+rect.h); rectangle(cv_img_origin, pt1, pt2, Scalar(1, 0, 0), 1, 8); get_img_rect(image, width, height, rect, img_rect); if(rect.width != resize_width && rect.height != resize_height ) { bilinear_image_resize(img_rect, rect.width, rect.height, img_resize, resize_width, resize_height); } else { int rect_size = rect.width*rect.height; for(int k=0; k<rect_size; ++k) { img_resize[k] = img_rect[k]; } } Datum datum; datum.set_channels(num_channels); datum.set_height(resize_height); datum.set_width(resize_width); datum.set_label(label); datum.clear_data(); datum.clear_float_data(); string* datum_string = datum.mutable_data(); for (int h = 0; h < resize_height; ++h) { for (int w = 0; w < resize_width; ++w) { datum_string->push_back(img_resize[h*resize_width+w]); } } datum_per_img[i] = datum; } cv::destroyWindow( "Image" );//销毁窗口 free(img_rect); free(img_resize); return true; }