// combining several vocab trees into one int combineVocab(vector<string> &allTreesIn, char *tree_out) { int num_trees = (int)allTreesIn.size(); VocabTree tree; tree.Read(allTreesIn[0].c_str()); //Start with the second tree, as we just read the first one for (int i = 1; i < num_trees; i++) { printf("[VocabCombine] Adding tree %d [%s]...\n", i, allTreesIn[i].c_str()); fflush(stdout); VocabTree tree_add; tree_add.Read(allTreesIn[i].c_str()); tree.Combine(tree_add); tree_add.Clear(); } //Now do the reweighting: int total_num_db_images = tree.GetMaxDatabaseImageIndex() + 1; printf("Total num_db_images: %d\n", total_num_db_images); tree.ComputeTFIDFWeights(total_num_db_images); tree.NormalizeDatabase(0, total_num_db_images); tree.Write(tree_out); //Write vectors to a file tree.WriteDatabaseVectors("vectors_all.txt", 0, total_num_db_images); return 0; }
int main(int argc, char **argv) { if (argc < 3) { printf("Usage: %s <tree1.in> <tree2.in> ... <tree.out>\n", argv[0]); return 1; } int num_trees = argc - 2; char *tree_out = argv[argc-1]; VocabTree tree; tree.Read(argv[1]); /* Start with the second tree, as we just read the first one */ for (int i = 1; i < num_trees; i++) { printf("[VocabCombine] Adding tree %d [%s]...\n", i, argv[i+1]); fflush(stdout); VocabTree tree_add; tree_add.Read(argv[i+1]); tree.Combine(tree_add); tree_add.Clear(); } /* Now do the reweighting */ // if (use_tfidf) int total_num_db_images = tree.GetMaxDatabaseImageIndex() + 1; printf("Total num_db_images: %d\n", total_num_db_images); tree.ComputeTFIDFWeights(total_num_db_images); tree.NormalizeDatabase(0, total_num_db_images); tree.Write(tree_out); /* Write vectors to a file */ tree.WriteDatabaseVectors("vectors_all.txt", 0, total_num_db_images); return 0; }
int main(int argc, char **argv) { if (argc < 4 || argc > 8) { printf("Usage: %s <list.in> <tree.in> <tree.out> [use_tfidf:1] " "[normalize:1] [start_id:0] [distance_type:1]\n", argv[0]); return 1; } double min_feature_scale = 1.4; bool use_tfidf = true; bool normalize = true; char *list_in = argv[1]; char *tree_in = argv[2]; char *tree_out = argv[3]; DistanceType distance_type = DistanceMin; int start_id = 0; if (argc >= 5) use_tfidf = atoi(argv[4]); if (argc >= 6) normalize = atoi(argv[5]); if (argc >= 7) start_id = atoi(argv[6]); if (argc >= 8) distance_type = (DistanceType) atoi(argv[7]); switch (distance_type) { case DistanceDot: printf("[VocabMatch] Using distance Dot\n"); break; case DistanceMin: printf("[VocabMatch] Using distance Min\n"); break; default: printf("[VocabMatch] Using no known distance!\n"); break; } FILE *f = fopen(list_in, "r"); if (f == NULL) { printf("Error opening file %s for reading\n", list_in); return 1; } std::vector<std::string> key_files; char buf[256]; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; key_files.push_back(std::string(buf)); } printf("[VocabBuildDB] Reading tree %s...\n", tree_in); fflush(stdout); VocabTree tree; tree.Read(tree_in); #if 1 tree.Flatten(); #endif tree.m_distance_type = distance_type; tree.SetInteriorNodeWeight(0.0); /* Initialize leaf weights to 1.0 */ tree.SetConstantLeafWeights(); const int dim = 128; int num_db_images = (int) key_files.size(); unsigned long count = 0; tree.ClearDatabase(); for (int i = 0; i < num_db_images; i++) { int num_keys = 0; unsigned char *keys = ReadAndFilterKeys(key_files[i].c_str(), dim, min_feature_scale, 0, num_keys); printf("[VocabBuildDB] Adding vector %d (%d keys)\n", start_id + i, num_keys); tree.AddImageToDatabase(start_id + i, num_keys, keys); if (num_keys > 0) delete [] keys; } printf("[VocabBuildDB] Pushed %lu features\n", count); fflush(stdout); if (use_tfidf) tree.ComputeTFIDFWeights(num_db_images); if (normalize) tree.NormalizeDatabase(start_id, num_db_images); printf("[VocabBuildDB] Writing tree...\n"); tree.Write(tree_out); // char filename[256]; // sprintf(filename, "vectors_%03d.txt", start_id); // tree.WriteDatabaseVectors(filename, start_id, num_db_images); return 0; }
// Building a database with a vocabulary tree int VocabDB(char *list_in, char *tree_in, char *db_out, int use_tfidf = 1, int normalize = 1, int start_id = 0, DistanceType distance_type = DistanceMin) { double min_feature_scale = 1.4; switch (distance_type) { case DistanceDot: printf("[VocabMatch] Using distance Dot\n"); break; case DistanceMin: printf("[VocabMatch] Using distance Min\n"); break; default: printf("[VocabMatch] Using no known distance!\n"); break; } FILE *f = fopen(list_in, "r"); if (f == NULL) { printf("Error opening file %s for reading\n", list_in); return 1; } std::vector<std::string> key_files; char buf[256]; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; key_files.push_back(std::string(buf)); } printf("[VocabBuildDB] Reading tree %s...\n", tree_in); fflush(stdout); VocabTree tree; tree.Read(tree_in); tree.Flatten(); tree.m_distance_type = distance_type; tree.SetInteriorNodeWeight(0.0); //Initialize leaf weights to 1.0 tree.SetConstantLeafWeights(); const int dim = 128; int num_db_images = (int)key_files.size(); unsigned long count = 0; tree.ClearDatabase(); for (int i = 0; i < num_db_images; i++) { int num_keys = 0; unsigned char *keys = ReadAndFilterKeys(key_files[i].c_str(), dim, min_feature_scale, 0, num_keys); printf("[VocabBuildDB] Adding vector %d (%d keys)\n", start_id + i, num_keys); tree.AddImageToDatabase(start_id + i, num_keys, keys); if (num_keys > 0) delete[] keys; } printf("[VocabBuildDB] Pushed %lu features\n", count); fflush(stdout); if (use_tfidf == 1) tree.ComputeTFIDFWeights(num_db_images); if (normalize == 1) tree.NormalizeDatabase(start_id, num_db_images); printf("[VocabBuildDB] Writing database ...\n"); tree.Write(db_out); // char filename[256]; // sprintf(filename, "vectors_%03d.txt", start_id); // tree.WriteDatabaseVectors(filename, start_id, num_db_images); return 0; }