bool MostCV::LevelDBReader::GetNextEntry(string &key, vector<double> &retVec, int &label) { if (!database_iter_->Valid()) return false; Datum datum; datum.clear_float_data(); datum.clear_data(); datum.ParseFromString(database_iter_->value().ToString()); key = database_iter_->key().ToString(); label = datum.label(); int expected_data_size = std::max<int>(datum.data().size(), datum.float_data_size()); const int datum_volume_size = datum.channels() * datum.height() * datum.width(); if (expected_data_size != datum_volume_size) { cout << "Something wrong in saved data."; assert(false); } retVec.resize(datum_volume_size); const string& data = datum.data(); if (data.size() != 0) { // Data stored in string, e.g. just pixel values of 196608 = 256 * 256 * 3 for (int i = 0; i < datum_volume_size; ++i) retVec[i] = data[i]; } else { // Data stored in real feature vector such as 4096 from feature extraction for (int i = 0; i < datum_volume_size; ++i) retVec[i] = datum.float_data(i); } database_iter_->Next(); ++record_idx_; return true; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc != 3) { LOG(ERROR) << "Usage: compute_image_mean input_leveldb output_file"; return 1; } leveldb::DB* db; leveldb::Options options; options.create_if_missing = false; LOG(INFO) << "Opening leveldb " << argv[1]; leveldb::Status status = leveldb::DB::Open( options, argv[1], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[1]; leveldb::ReadOptions read_options; read_options.fill_cache = false; leveldb::Iterator* it = db->NewIterator(read_options); it->SeekToFirst(); Datum datum; BlobProto sum_blob; int count = 0; datum.ParseFromString(it->value().ToString()); sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; for (it->SeekToFirst(); it->Valid(); it->Next()) { // just a dummy operation datum.ParseFromString(it->value().ToString()); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } if (count % 10000 != 0) { LOG(ERROR) << "Processed " << count << " files."; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); delete db; return 0; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc < 5) { printf( "Convert a set of images to the leveldb format used\n" "as input for Caffe.\n" "Usage:\n" " convert_imageset ROOTFOLDER/ ANNOTATION DB_NAME" " MODE[0-train, 1-val, 2-test] RANDOM_SHUFFLE_DATA[0 or 1, default 1] RESIZE_WIDTH[default 256] RESIZE_HEIGHT[default 256](0 indicates no resize)\n" "The ImageNet dataset for the training demo is at\n" " http://www.image-net.org/download-images\n"); return 0; } std::ifstream infile(argv[2]); string root_folder(argv[1]); string coarse_folder(argv[8]); string local_folder(argv[9]); string van_folder(argv[10]); string edge_folder(argv[11]); string layout_folder(argv[12]); std::vector<Seg_Anno> annos; std::set<string> fNames; string filename; float prop; int cc = 0; while (infile >> filename) { if (cc % 1000 == 0) LOG(INFO)<<filename; cc ++; Seg_Anno seg_Anno; seg_Anno.filename_ = filename; int x,y; infile >> x >> y; for (int i = 0; i < LABEL_LEN; i++) { //infile >> prop; if(!(prop < 1000000 && prop > -1000000)) { printf("123"); } seg_Anno.pos_.push_back(0); } //string labelFile = filename; //labelFile[labelFile.size() - 1] = 't'; //labelFile[labelFile.size() - 2] = 'x'; //labelFile[labelFile.size() - 3] = 't'; //labelFile = coarse_folder + "/" + labelFile; //FILE * tf = fopen(labelFile.c_str(), "rb"); //if(tf == NULL) continue; //fclose(tf); if (fNames.find(filename)== fNames.end()) { fNames.insert(filename); annos.push_back(seg_Anno); } //debug //if(annos.size() == 10) // break; } if (argc < 6 || argv[5][0] != '0') { // randomly shuffle data LOG(INFO)<< "Shuffling data"; std::random_shuffle(annos.begin(), annos.end()); } LOG(INFO)<< "A total of " << annos.size() << " images."; leveldb::DB* db; leveldb::Options options; options.error_if_exists = true; options.create_if_missing = true; options.write_buffer_size = 268435456; LOG(INFO)<< "Opening leveldb " << argv[3]; leveldb::Status status = leveldb::DB::Open(options, argv[3], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[3]; Datum datum; int count = 0; const int maxKeyLength = 256; char key_cstr[maxKeyLength]; leveldb::WriteBatch* batch = new leveldb::WriteBatch(); int data_size; bool data_size_initialized = false; // resize to height * width int width = RESIZE_LEN; int height = RESIZE_LEN; if (argc > 6) width = atoi(argv[6]); if (argc > 7) height = atoi(argv[7]); if (width == 0 || height == 0) LOG(INFO) << "NO RESIZE SHOULD BE DONE"; else LOG(INFO) << "RESIZE DIM: " << width << "*" << height; for (int anno_id = 0; anno_id < annos.size(); ++anno_id) { string labelFile = annos[anno_id].filename_; labelFile[labelFile.size() - 1] = 't'; labelFile[labelFile.size() - 2] = 'x'; labelFile[labelFile.size() - 3] = 't'; if (!MyReadImageToDatum(root_folder + "/" + annos[anno_id].filename_, coarse_folder + "/" + labelFile, local_folder + "/" + labelFile, van_folder + "/" + labelFile, edge_folder + '/' + labelFile, layout_folder + '/' + labelFile , annos[anno_id].pos_, height, width, &datum)) { continue; } if (!data_size_initialized) { data_size = datum.channels() * datum.height() * datum.width() ; data_size_initialized = true; } else { int dataLen = datum.float_data_size(); CHECK_EQ(dataLen, data_size)<< "Incorrect data field size " << dataLen; } // sequential snprintf(key_cstr, maxKeyLength, "%07d_%s", anno_id, annos[anno_id].filename_.c_str()); string value; // get the value datum.SerializeToString(&value); batch->Put(string(key_cstr), value); if (++count % 1000 == 0) { db->Write(leveldb::WriteOptions(), batch); LOG(ERROR)<< "Processed " << count << " files."; delete batch; batch = new leveldb::WriteBatch(); } } // write the last batch if (count % 1000 != 0) { db->Write(leveldb::WriteOptions(), batch); LOG(ERROR)<< "Processed " << count << " files."; } delete batch; delete db; return 0; }
std::vector<float> calc_mean(const std::string &db_fname) { scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); db->Open(db_fname, db::READ); scoped_ptr<db::Cursor> cursor(db->NewCursor()); BlobProto sum_blob; int count = 0; // load first datum Datum datum; datum.ParseFromString(cursor->value()); if (DecodeDatumNative(&datum)) { LOG(INFO) << "Decoding Datum"; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; while (cursor->valid()) { Datum datum; datum.ParseFromString(cursor->value()); DecodeDatumNative(&datum); const std::string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { CHECK_EQ(data.size(), size_in_datum); for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { CHECK_EQ(datum.float_data_size(), size_in_datum); for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(INFO) << "Processed " << count << " files."; } cursor->Next(); } if (count % 10000 != 0) { LOG(INFO) << "Processed " << count << " files."; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } const int channels = sum_blob.channels(); const int dim = sum_blob.height() * sum_blob.width(); std::vector<float> mean_values(channels, 0.0); LOG(INFO) << "Number of channels: " << channels; for (int c = 0; c < channels; ++c) { for (int i = 0; i < dim; ++i) { mean_values[c] += sum_blob.data(dim * c + i); } mean_values[c] /= dim; LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c]; } return mean_values; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc < 3 || argc > 4) { LOG(ERROR) << "Usage: compute_image_mean input_db output_file" << " db_backend[leveldb or lmdb]"; return 1; } string db_backend = "lmdb"; if (argc == 4) { db_backend = string(argv[3]); } // Open leveldb leveldb::DB* db; leveldb::Options options; options.create_if_missing = false; leveldb::Iterator* it = NULL; // lmdb MDB_env* mdb_env; MDB_dbi mdb_dbi; MDB_val mdb_key, mdb_value; MDB_txn* mdb_txn; MDB_cursor* mdb_cursor; // Open db if (db_backend == "leveldb") { // leveldb LOG(INFO) << "Opening leveldb " << argv[1]; leveldb::Status status = leveldb::DB::Open( options, argv[1], &db); CHECK(status.ok()) << "Failed to open leveldb " << argv[1]; leveldb::ReadOptions read_options; read_options.fill_cache = false; it = db->NewIterator(read_options); it->SeekToFirst(); } else if (db_backend == "lmdb") { // lmdb LOG(INFO) << "Opening lmdb " << argv[1]; CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed"; CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS); // 1TB CHECK_EQ(mdb_env_open(mdb_env, argv[1], MDB_RDONLY, 0664), MDB_SUCCESS) << "mdb_env_open failed"; CHECK_EQ(mdb_txn_begin(mdb_env, NULL, MDB_RDONLY, &mdb_txn), MDB_SUCCESS) << "mdb_txn_begin failed"; CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS) << "mdb_open failed"; CHECK_EQ(mdb_cursor_open(mdb_txn, mdb_dbi, &mdb_cursor), MDB_SUCCESS) << "mdb_cursor_open failed"; CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST), MDB_SUCCESS); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } // set size info Datum datum; BlobProto sum_blob; int count = 0; // load first datum if (db_backend == "leveldb") { datum.ParseFromString(it->value().ToString()); } else if (db_backend == "lmdb") { datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } // start collecting LOG(INFO) << "Starting Iteration"; if (db_backend == "leveldb") { // leveldb for (it->SeekToFirst(); it->Valid(); it->Next()) { // just a dummy operation datum.ParseFromString(it->value().ToString()); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } } else if (db_backend == "lmdb") { // lmdb CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST), MDB_SUCCESS); do { // just a dummy operation datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } while (mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_NEXT) == MDB_SUCCESS); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } caffe::Blob<float> vis; vis.FromProto(sum_blob); caffe::imshow(&vis, 1, "mean img"); cv::waitKey(0); google::protobuf::RepeatedField<float>* tmp = sum_blob.mutable_data(); std::vector<float> mean_data(tmp->begin(), tmp->end()); double sum = std::accumulate(mean_data.begin(), mean_data.end(), 0.0); double mean2 = sum / mean_data.size(); double sq_sum = std::inner_product(mean_data.begin(), mean_data.end(), mean_data.begin(), 0.0); double stdev = std::sqrt(sq_sum / mean_data.size() - mean2 * mean2); LOG(INFO) << "mean of mean image: " << mean2 << " std: " << stdev; // Write to disk LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); // Clean up if (db_backend == "leveldb") { delete db; } else if (db_backend == "lmdb") { mdb_cursor_close(mdb_cursor); mdb_close(mdb_env, mdb_dbi); mdb_txn_abort(mdb_txn); mdb_env_close(mdb_env); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } return 0; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); std::ifstream infile(argv[1]); std::vector<std::pair<string, int> > lines; string filename; int label; while (infile >> filename >> label) { lines.push_back(std::make_pair(filename, label)); } LOG(INFO) << "A total of " << lines.size() << " images."; Datum datum; BlobProto sum_blob; int count = 0; if (!ReadImageToDatum(lines[0].first, lines[0].second, resize_height, resize_width, is_color, &datum)) { return -1; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; for (int line_id = 0; line_id < lines.size(); ++line_id) { if (!ReadImageToDatum(lines[line_id].first, lines[line_id].second, resize_height, resize_width, is_color, &datum)) { continue; } const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); return 0; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); #ifdef USE_OPENCV #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif gflags::SetUsageMessage("Compute the mean_image of a set of images given by" " a leveldb/lmdb\n" "Usage:\n" " compute_image_mean [FLAGS] INPUT_DB [OUTPUT_FILE]\n"); gflags::ParseCommandLineFlags(&argc, &argv, true); if (argc < 2 || argc > 3) { gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/compute_image_mean"); return 1; } scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); db->Open(argv[1], db::READ); scoped_ptr<db::Cursor> cursor(db->NewCursor()); BlobProto sum_blob; int count = 0; // load first datum Datum datum; datum.ParseFromString(cursor->value()); if (DecodeDatumNative(&datum)) { LOG(INFO) << "Decoding Datum"; } sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; while (cursor->valid()) { Datum datum; datum.ParseFromString(cursor->value()); DecodeDatumNative(&datum); const std::string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { CHECK_EQ(data.size(), size_in_datum); for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { CHECK_EQ(datum.float_data_size(), size_in_datum); for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(INFO) << "Processed " << count << " files."; printf("Processed %d files.\n",count); } cursor->Next(); } if (count % 10000 != 0) { LOG(INFO) << "Processed " << count << " files."; printf("Processed %d files.\n",count); } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk if (argc == 3) { LOG(INFO) << "Write to " << argv[2]; WriteProtoToBinaryFile(sum_blob, argv[2]); } const int channels = sum_blob.channels(); const int dim = sum_blob.height() * sum_blob.width(); std::vector<float> mean_values(channels, 0.0); LOG(INFO) << "Number of channels: " << channels; for (int c = 0; c < channels; ++c) { for (int i = 0; i < dim; ++i) { mean_values[c] += sum_blob.data(dim * c + i); } LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim; } #else LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; #endif // USE_OPENCV return 0; }
int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); if (argc < 5) { LOG(ERROR) << "Usage: compute_image_mean input_list new_height new_width output_file [dropping_rate]"; return 1; } char* fn_list = argv[1]; const int height = atoi(argv[2]); const int width = atoi(argv[3]); char* fn_output = argv[4]; int sampling_rate = 1; if (argc >= 6){ sampling_rate = atoi(argv[5]); LOG(INFO) << "using sampling rate " << sampling_rate; } Datum datum; BlobProto sum_blob; int count = 0; std::ifstream infile(fn_list); string fn_frm; int label; infile >> fn_frm >> label; ReadImageToDatum(fn_frm, label, height, width, &datum); sum_blob.set_num(1); sum_blob.set_channels(datum.channels()); sum_blob.set_length(1); sum_blob.set_height(datum.height()); sum_blob.set_width(datum.width()); const int data_size = datum.channels() * datum.height() * datum.width(); int size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); for (int i = 0; i < size_in_datum; ++i) { sum_blob.add_data(0.); } LOG(INFO) << "Starting Iteration"; int i = 0; while (infile >> fn_frm >> label) { i++; if (i % sampling_rate!=0){ continue; } ReadImageToDatum(fn_frm, label, height, width, &datum); const string& data = datum.data(); size_in_datum = std::max<int>(datum.data().size(), datum.float_data_size()); CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " << size_in_datum; if (data.size() != 0) { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]); } } else { for (int i = 0; i < size_in_datum; ++i) { sum_blob.set_data(i, sum_blob.data(i) + static_cast<float>(datum.float_data(i))); } } ++count; if (count % 10000 == 0) { LOG(ERROR) << "Processed " << count << " files."; } } infile.close(); if (count % 10000 != 0) { LOG(ERROR) << "Processed " << count << " files."; } for (int i = 0; i < sum_blob.data_size(); ++i) { sum_blob.set_data(i, sum_blob.data(i) / count); } // Write to disk LOG(INFO) << "Write to " << fn_output; WriteProtoToBinaryFile(sum_blob, fn_output); return 0; }