void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { // Initialize DB switch (this->layer_param_.data_param().backend()) { case DataParameter_DB_LEVELDB: { leveldb::DB* db_temp; leveldb::Options options = GetLevelDBOptions(); options.create_if_missing = false; LOG(INFO) << "Opening leveldb " << this->layer_param_.data_param().source(); leveldb::Status status = leveldb::DB::Open( options, this->layer_param_.data_param().source(), &db_temp); CHECK(status.ok()) << "Failed to open leveldb " << this->layer_param_.data_param().source() << std::endl << status.ToString(); db_.reset(db_temp); iter_.reset(db_->NewIterator(leveldb::ReadOptions())); iter_->SeekToFirst(); } break; case DataParameter_DB_LMDB: CHECK_EQ(mdb_env_create(&mdb_env_), MDB_SUCCESS) << "mdb_env_create failed"; CHECK_EQ(mdb_env_set_mapsize(mdb_env_, 1099511627776), MDB_SUCCESS); // 1TB CHECK_EQ(mdb_env_open(mdb_env_, this->layer_param_.data_param().source().c_str(), MDB_RDONLY|MDB_NOTLS, 0664), MDB_SUCCESS) << "mdb_env_open failed"; CHECK_EQ(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_), MDB_SUCCESS) << "mdb_txn_begin failed"; CHECK_EQ(mdb_open(mdb_txn_, NULL, 0, &mdb_dbi_), MDB_SUCCESS) << "mdb_open failed"; CHECK_EQ(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_), MDB_SUCCESS) << "mdb_cursor_open failed"; LOG(INFO) << "Opening lmdb " << this->layer_param_.data_param().source(); CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST), MDB_SUCCESS) << "mdb_cursor_get failed"; break; default: LOG(FATAL) << "Unknown database backend"; } // Check if we would need to randomly skip a few data points if (this->layer_param_.data_param().rand_skip()) { unsigned int skip = caffe_rng_rand() % this->layer_param_.data_param().rand_skip(); LOG(INFO) << "Skipping first " << skip << " data points."; while (skip-- > 0) { switch (this->layer_param_.data_param().backend()) { case DataParameter_DB_LEVELDB: iter_->Next(); if (!iter_->Valid()) { iter_->SeekToFirst(); } break; case DataParameter_DB_LMDB: if (mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_NEXT) != MDB_SUCCESS) { CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST), MDB_SUCCESS); } break; default: LOG(FATAL) << "Unknown database backend"; } } } // Read a data point, and use it to initialize the top blob. Datum datum; switch (this->layer_param_.data_param().backend()) { case DataParameter_DB_LEVELDB: datum.ParseFromString(iter_->value().ToString()); break; case DataParameter_DB_LMDB: datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size); break; default: LOG(FATAL) << "Unknown database backend"; } // image int crop_size = this->layer_param_.transform_param().crop_size(); if (crop_size > 0) { (*top)[0]->Reshape(this->layer_param_.data_param().batch_size(), datum.channels(), crop_size, crop_size); this->prefetch_data_.Reshape(this->layer_param_.data_param().batch_size(), datum.channels(), crop_size, crop_size); } else { (*top)[0]->Reshape( this->layer_param_.data_param().batch_size(), datum.channels(), datum.height(), datum.width()); this->prefetch_data_.Reshape(this->layer_param_.data_param().batch_size(), datum.channels(), datum.height(), datum.width()); } LOG(INFO) << "output data size: " << (*top)[0]->num() << "," << (*top)[0]->channels() << "," << (*top)[0]->height() << "," << (*top)[0]->width(); // label if (this->output_labels_) { // liu (*top)[1]->Reshape(this->layer_param_.data_param().batch_size(), 4, 1, 1); this->prefetch_label_.Reshape(this->layer_param_.data_param().batch_size(), 4, 1, 1); /* (*top)[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1); this->prefetch_label_.Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1); */ } // datum size this->datum_channels_ = datum.channels(); this->datum_height_ = datum.height(); this->datum_width_ = datum.width(); this->datum_size_ = datum.channels() * datum.height() * datum.width(); }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc < 3 || argc > 4) { LOG(ERROR) << "Usage: compute_image_mean input_db output_file" << " db_backend[leveldb or lmdb]"; return 1; } string db_backend = "lmdb"; if (argc == 4) { db_backend = string(argv[3]); } // Open leveldb leveldb::DB* db; leveldb::Options options; options.create_if_missing = false; leveldb::Iterator* it = NULL; // lmdb MDB_env* mdb_env; MDB_dbi mdb_dbi; MDB_val mdb_key, mdb_value; MDB_txn* mdb_txn; MDB_cursor* mdb_cursor; // Open db if (db_backend == "leveldb") { // leveldb LOG(INFO) << "Opening leveldb " << argv[1]; leveldb::Status status = leveldb::DB::Open( options, argv[1], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[1]; leveldb::ReadOptions read_options; read_options.fill_cache = false; it = db->NewIterator(read_options); it->SeekToFirst(); } else if (db_backend == "lmdb") { // lmdb LOG(INFO) << "Opening lmdb " << argv[1]; CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed"; CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS); // 1TB CHECK_EQ(mdb_env_open(mdb_env, argv[1], MDB_RDONLY, 0664), MDB_SUCCESS) << "mdb_env_open failed"; CHECK_EQ(mdb_txn_begin(mdb_env, NULL, MDB_RDONLY, &mdb_txn), MDB_SUCCESS) << "mdb_txn_begin failed"; CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS) << "mdb_open failed"; CHECK_EQ(mdb_cursor_open(mdb_txn, mdb_dbi, &mdb_cursor), MDB_SUCCESS) << "mdb_cursor_open failed"; CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST), MDB_SUCCESS); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } // set size info Datum datum; BlobProto sum_blob; int count = 0; // load first datum if (db_backend == "leveldb") { datum.ParseFromString(it->value().ToString()); } else if (db_backend == "lmdb") { datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } // start collecting LOG(INFO) << "Starting Iteration"; if (db_backend == "leveldb") { // leveldb for (it->SeekToFirst(); it->Valid(); it->Next()) { // just a dummy operation datum.ParseFromString(it->value().ToString()); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } } else if (db_backend == "lmdb") { // lmdb CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST), MDB_SUCCESS); do { // just a dummy operation datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } while (mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_NEXT) == MDB_SUCCESS); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } caffe::Blob<float> vis; vis.FromProto(sum_blob); caffe::imshow(&vis, 1, "mean img"); cv::waitKey(0); google::protobuf::RepeatedField<float>* tmp = sum_blob.mutable_data(); std::vector<float> mean_data(tmp->begin(), tmp->end()); double sum = std::accumulate(mean_data.begin(), mean_data.end(), 0.0); double mean2 = sum / mean_data.size(); double sq_sum = std::inner_product(mean_data.begin(), mean_data.end(), mean_data.begin(), 0.0); double stdev = std::sqrt(sq_sum / mean_data.size() - mean2 * mean2); LOG(INFO) << "mean of mean image: " << mean2 << " std: " << stdev; // Write to disk LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); // Clean up if (db_backend == "leveldb") { delete db; } else if (db_backend == "lmdb") { mdb_cursor_close(mdb_cursor); mdb_close(mdb_env, mdb_dbi); mdb_txn_abort(mdb_txn); mdb_env_close(mdb_env); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } return 0; }
void DataLayer<Dtype>::InternalThreadEntry() { Datum datum; CHECK(this->prefetch_data_.count()); Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); Dtype* top_label = NULL; // suppress warnings about uninitialized variables if (this->output_labels_) { top_label = this->prefetch_label_.mutable_cpu_data(); } const int batch_size = this->layer_param_.data_param().batch_size(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob switch (this->layer_param_.data_param().backend()) { case DataParameter_DB_LEVELDB: CHECK(iter_); CHECK(iter_->Valid()); datum.ParseFromString(iter_->value().ToString()); break; case DataParameter_DB_LMDB: CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_GET_CURRENT), MDB_SUCCESS); datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size); break; default: LOG(FATAL) << "Unknown database backend"; } // Apply data transformations (mirror, scale, crop...) this->data_transformer_.Transform(item_id, datum, this->mean_, top_data); if (this->output_labels_) { // liu // top_label[item_id] = datum.label(); // LOG(ERROR) << "label size " << datum.label_size() << " " << datum.label(0) \ << " " << datum.label(1) << " " << datum.label(2) << " " << datum.label(3); for(int label_i=0; label_i < datum.label_size(); label_i++){ top_label[item_id * datum.label_size() + label_i] = datum.label(label_i); } } // go to the next iter switch (this->layer_param_.data_param().backend()) { case DataParameter_DB_LEVELDB: iter_->Next(); if (!iter_->Valid()) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; iter_->SeekToFirst(); } break; case DataParameter_DB_LMDB: if (mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_NEXT) != MDB_SUCCESS) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST), MDB_SUCCESS); } break; default: LOG(FATAL) << "Unknown database backend"; } } }