// combining several vocab trees into one int combineVocab(vector<string> &allTreesIn, char *tree_out) { int num_trees = (int)allTreesIn.size(); VocabTree tree; tree.Read(allTreesIn[0].c_str()); //Start with the second tree, as we just read the first one for (int i = 1; i < num_trees; i++) { printf("[VocabCombine] Adding tree %d [%s]...\n", i, allTreesIn[i].c_str()); fflush(stdout); VocabTree tree_add; tree_add.Read(allTreesIn[i].c_str()); tree.Combine(tree_add); tree_add.Clear(); } //Now do the reweighting: int total_num_db_images = tree.GetMaxDatabaseImageIndex() + 1; printf("Total num_db_images: %d\n", total_num_db_images); tree.ComputeTFIDFWeights(total_num_db_images); tree.NormalizeDatabase(0, total_num_db_images); tree.Write(tree_out); //Write vectors to a file tree.WriteDatabaseVectors("vectors_all.txt", 0, total_num_db_images); return 0; }
int main(int argc, char **argv) { if (argc < 3) { printf("Usage: %s <tree1.in> <tree2.in> ... <tree.out>\n", argv[0]); return 1; } int num_trees = argc - 2; char *tree_out = argv[argc-1]; VocabTree tree; tree.Read(argv[1]); /* Start with the second tree, as we just read the first one */ for (int i = 1; i < num_trees; i++) { printf("[VocabCombine] Adding tree %d [%s]...\n", i, argv[i+1]); fflush(stdout); VocabTree tree_add; tree_add.Read(argv[i+1]); tree.Combine(tree_add); tree_add.Clear(); } /* Now do the reweighting */ // if (use_tfidf) int total_num_db_images = tree.GetMaxDatabaseImageIndex() + 1; printf("Total num_db_images: %d\n", total_num_db_images); tree.ComputeTFIDFWeights(total_num_db_images); tree.NormalizeDatabase(0, total_num_db_images); tree.Write(tree_out); /* Write vectors to a file */ tree.WriteDatabaseVectors("vectors_all.txt", 0, total_num_db_images); return 0; }
bool VTreeDB::execute_vtqueries(){ //--- Alocate the vocabulary tree //------------------------------- VocabTree vtree = VocabTree(K,L); //--- Alocate postgres //-------------------- DBOperations dbm = DBOperations(); //--- Load postgres credentials //----------------------------- //--- Load train features //----------------------- std::string modelPath; std::vector<std::string> descsList; if(!load_synth_info(listPath, modelPath, descsList)) return false; std::string modelName = strip_tail(modelPath, "/"); std::vector<std::vector<Descriptor> > descriptors = std::vector<std::vector<Descriptor> >(descsList.size()); std::string listBasePath = strip_path(listPath); logger << MOD_TAG << "Loading the descriptors..." << "\n"; for(unsigned int i = 0; i < descsList.size(); i++){ if(!load_synthetic_descritpors(listBasePath + descsList[i], descriptors[i])) return false; } logger << INDENT << "...done." << "\n"; unsigned int nDocuments; switch(option){ case 0: //--- Create database //------------------- logger << MOD_TAG << "Connecting to the database..." << "\n"; dbm.connect(onlineWS + "vtreesql.db"); logger << MOD_TAG << "Creating tables..." << "\n"; dbm.create_tables(); //--- Create vocabulary //--------------------- logger << MOD_TAG << "Creating the vocabulary..." << "\n"; vtree.create(descriptors); logger << MOD_TAG << "Saving the vocabulary..." << "\n"; vtree.save_vocabulary(onlineWS); logger << MOD_TAG << "...done" << "\n"; vtree.unload_vocabulary(); break; case 1: //--- Connect to the postgres db //------------------------------ logger << MOD_TAG << "Connecting to the database..." << "\n"; dbm.connect(onlineWS + "vtreesql.db"); if(dbm.verify_model_existance(modelName)){ logger << INDENT << "Model " << modelName << " is already inserted...exiting." << "\n"; break; } //--- Load vocabulary to enrich //----------------------------- if(!boost::filesystem::exists(onlineWS + "vtreedb.yml.gz")){ logger << MOD_TAG << "Loading the vocabulary..." << "\n"; vtree.load_vocabulary(onlineWS); nDocuments = vtree.getSize(); logger << MOD_TAG << "Adding the descriptors..." << "\n"; vtree.add(descriptors); logger << MOD_TAG << "Saving the vocabulary tree database..." << "\n"; vtree.save_vtreedb(onlineWS); vtree.unload_vtreedb(); }else{ logger << MOD_TAG << "Loading the vocabulary tree database..." << "\n"; vtree.load_vtreedb(onlineWS); nDocuments = vtree.getSize(); logger << MOD_TAG << "Adding the descriptors..." << "\n"; vtree.add(descriptors); logger << MOD_TAG << "Saving the vocabulary tree database..." << "\n"; vtree.save_vtreedb(onlineWS); vtree.unload_vtreedb(); } logger << MOD_TAG << "Updating the sqlite database..." << "\n"; dbm.insert_model(modelPath,modelName, "unkown location"); for(unsigned int i = 0; i < descsList.size(); i++){ std::stringstream ss; ss << (nDocuments + i); dbm.insert_synthetics(listBasePath + descsList[i], ss.str(), modelPath); } logger << MOD_TAG << "...done" << "\n"; break; default: logger << "[WARN] Wrong option..." << "\n"; break; } return true; }
int main(int argc, char **argv) { if (argc < 4 || argc > 8) { printf("Usage: %s <list.in> <tree.in> <tree.out> [use_tfidf:1] " "[normalize:1] [start_id:0] [distance_type:1]\n", argv[0]); return 1; } double min_feature_scale = 1.4; bool use_tfidf = true; bool normalize = true; char *list_in = argv[1]; char *tree_in = argv[2]; char *tree_out = argv[3]; DistanceType distance_type = DistanceMin; int start_id = 0; if (argc >= 5) use_tfidf = atoi(argv[4]); if (argc >= 6) normalize = atoi(argv[5]); if (argc >= 7) start_id = atoi(argv[6]); if (argc >= 8) distance_type = (DistanceType) atoi(argv[7]); switch (distance_type) { case DistanceDot: printf("[VocabMatch] Using distance Dot\n"); break; case DistanceMin: printf("[VocabMatch] Using distance Min\n"); break; default: printf("[VocabMatch] Using no known distance!\n"); break; } FILE *f = fopen(list_in, "r"); if (f == NULL) { printf("Error opening file %s for reading\n", list_in); return 1; } std::vector<std::string> key_files; char buf[256]; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; key_files.push_back(std::string(buf)); } printf("[VocabBuildDB] Reading tree %s...\n", tree_in); fflush(stdout); VocabTree tree; tree.Read(tree_in); #if 1 tree.Flatten(); #endif tree.m_distance_type = distance_type; tree.SetInteriorNodeWeight(0.0); /* Initialize leaf weights to 1.0 */ tree.SetConstantLeafWeights(); const int dim = 128; int num_db_images = (int) key_files.size(); unsigned long count = 0; tree.ClearDatabase(); for (int i = 0; i < num_db_images; i++) { int num_keys = 0; unsigned char *keys = ReadAndFilterKeys(key_files[i].c_str(), dim, min_feature_scale, 0, num_keys); printf("[VocabBuildDB] Adding vector %d (%d keys)\n", start_id + i, num_keys); tree.AddImageToDatabase(start_id + i, num_keys, keys); if (num_keys > 0) delete [] keys; } printf("[VocabBuildDB] Pushed %lu features\n", count); fflush(stdout); if (use_tfidf) tree.ComputeTFIDFWeights(num_db_images); if (normalize) tree.NormalizeDatabase(start_id, num_db_images); printf("[VocabBuildDB] Writing tree...\n"); tree.Write(tree_out); // char filename[256]; // sprintf(filename, "vectors_%03d.txt", start_id); // tree.WriteDatabaseVectors(filename, start_id, num_db_images); return 0; }
int main(int argc, char **argv) { const int dim = 128; if (argc != 6 && argc != 7 && argc != 8) { printf("Usage: %s <tree.in> <db.in> <query.in> <num_nbrs> " "<matches.out> [distance_type:1] [normalize:1]\n", argv[0]); return 1; } char *tree_in = argv[1]; char *db_in = argv[2]; char *query_in = argv[3]; int num_nbrs = atoi(argv[4]); char *matches_out = argv[5]; DistanceType distance_type = DistanceMin; bool normalize = true; #if 0 if (argc >= 7) output_html = argv[6]; #endif if (argc >= 7) distance_type = (DistanceType) atoi(argv[6]); if (argc >= 8) normalize = (atoi(argv[7]) != 0); printf("[VocabMatch] Using tree %s\n", tree_in); switch (distance_type) { case DistanceDot: printf("[VocabMatch] Using distance Dot\n"); break; case DistanceMin: printf("[VocabMatch] Using distance Min\n"); break; default: printf("[VocabMatch] Using no known distance!\n"); break; } /* Read the tree */ printf("[VocabMatch] Reading tree...\n"); fflush(stdout); clock_t start = clock(); VocabTree tree; tree.Read(tree_in); clock_t end = clock(); printf("[VocabMatch] Read tree in %0.3fs\n", (double) (end - start) / CLOCKS_PER_SEC); #if 1 tree.Flatten(); #endif tree.SetDistanceType(distance_type); tree.SetInteriorNodeWeight(0, 0.0); /* Read the database keyfiles */ FILE *f = fopen(db_in, "r"); std::vector<std::string> db_files; char buf[256]; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; db_files.push_back(std::string(buf)); } fclose(f); /* Read the query keyfiles */ f = fopen(query_in, "r"); std::vector<std::string> query_files; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; char keyfile[256]; sscanf(buf, "%s", keyfile); query_files.push_back(std::string(keyfile)); } fclose(f); int num_db_images = db_files.size(); int num_query_images = query_files.size(); printf("[VocabMatch] Read %d database images\n", num_db_images); /* Now score each query keyfile */ printf("[VocabMatch] Scoring %d query images...\n", num_query_images); fflush(stdout); #if 0 FILE *f_html = fopen(output_html, "w"); PrintHTMLHeader(f_html, num_nbrs); #endif float *scores = new float[num_db_images]; double *scores_d = new double[num_db_images]; int *perm = new int[num_db_images]; FILE *f_match = fopen(matches_out, "w"); if (f_match == NULL) { printf("[VocabMatch] Error opening file %s for writing\n", matches_out); return 1; } for (int i = 0; i < num_query_images; i++) { start = clock(); /* Clear scores */ for (int j = 0; j < num_db_images; j++) scores[j] = 0.0; int num_keys = 0; unsigned char *keys = ReadDescriptorFile(query_files[i].c_str(), dim, num_keys); clock_t start_score = clock(); double mag = tree.ScoreQueryKeys(num_keys, normalize, keys, scores); clock_t end_score = end = clock(); printf("[VocabMatch] Scored image %s in %0.3fs " "( %0.3fs total, num_keys = %d, mag = %0.3f )\n", query_files[i].c_str(), (double) (end_score - start_score) / CLOCKS_PER_SEC, (double) (end - start) / CLOCKS_PER_SEC, num_keys, mag); /* Find the top scores */ for (int j = 0; j < num_db_images; j++) { scores_d[j] = (double) scores[j]; } qsort_descending(); qsort_perm(num_db_images, scores_d, perm); int top = MIN(num_nbrs, num_db_images); for (int j = 0; j < top; j++) { // if (perm[j] == index_i) // continue; fprintf(f_match, "%d %d %0.4f\n", i, perm[j], scores_d[j]); //fprintf(f_match, "%d %d %0.4f\n", i, perm[j], mag - scores_d[j]); } fflush(f_match); fflush(stdout); #if 0 PrintHTMLRow(f_html, query_files[i], scores_d, perm, num_nbrs, db_files); #endif delete [] keys; } fclose(f_match); #if 0 PrintHTMLFooter(f_html); fclose(f_html); #endif delete [] scores; delete [] scores_d; delete [] perm; return 0; }
int main(int argc, char **argv) { if (argc != 6) { printf("Usage: %s <list.in> <depth> <branching_factor> " "<restarts> <tree.out>\n", argv[0]); return 1; } const char *list_in = argv[1]; int depth = atoi(argv[2]); int bf = atoi(argv[3]); int restarts = atoi(argv[4]); const char *tree_out = argv[5]; std::ofstream log("debug_VocabLearn.txt"); log << "Building tree with depth: " << depth << ", branching factor: " << bf << ", and restarts: " << restarts << std::endl; printf("Building tree with depth: %d, branching factor: %d, " "and restarts: %d\n", depth, bf, restarts); FILE *f = fopen(list_in, "r"); std::vector<std::string> key_files; char buf[256]; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; key_files.push_back(std::string(buf)); } fclose(f); int num_files = (int) key_files.size(); unsigned long total_keys = 0; for (int i = 0; i < num_files; i++) { int num_keys = GetNumberOfKeys(key_files[i].c_str()); total_keys += num_keys; } log << "Total number of keys: " << total_keys << std::endl; printf("Total number of keys: %lu\n", total_keys); fflush(stdout); int dim = 128; unsigned long long len = (unsigned long long) total_keys * dim; int num_arrays = len / MAX_ARRAY_SIZE + ((len % MAX_ARRAY_SIZE) == 0 ? 0 : 1); unsigned char **vs = new unsigned char *[num_arrays]; log << "Allocating " << len << " bytes in total, in " << num_arrays << " arrays" << std::endl; printf("Allocating %llu bytes in total, in %d arrays\n", len, num_arrays); unsigned long long total = 0; for (int i = 0; i < num_arrays; i++) { unsigned long long remainder = len - total; unsigned long len_curr = MIN(remainder, MAX_ARRAY_SIZE); log << "Allocating array of size " << len_curr << std::endl; printf("Allocating array of size %lu\n", len_curr); fflush(stdout); vs[i] = new unsigned char[len_curr]; remainder -= len_curr; } /* Create the array of pointers */ log << "Allocating pointer array of size " << sizeof(unsigned char) * total_keys << std::endl; printf("Allocating pointer array of size %lu\n", 4 * total_keys); fflush(stdout); unsigned char **vp = new unsigned char *[total_keys]; unsigned long off = 0; unsigned long curr_key = 0; int curr_array = 0; for (int i = 0; i < num_files; i++) { log << "Reading key file " << key_files[i] << std::endl; //printf(" Reading keyfile %s\n", key_files[i].c_str()); fflush(stdout); unsigned char *keys; int num_keys = 0; keypt_t *info = NULL; num_keys = ReadKeyFileCXX(key_files[i].c_str(), &keys); log << "There are " << num_keys << " keys in file " << key_files[i] << std::endl; // num_keys = ReadKeyFile(key_files[i].c_str(), &keys); #ifdef DEBUG std::ofstream debug_log("debug_log_2.txt"); debug_log << num_keys << " " << dim << std::endl; for (int i=0; i<num_keys; i++) { for (int k=0; k<dim; k++) { debug_log << (int) keys[dim*i + k] << " "; } debug_log << std::endl; } debug_log.close(); #endif if (num_keys > 0) { for (int j = 0; j < num_keys; j++) { #ifdef DEBUG log << "Copying key " << j << std::endl; #endif for (int k = 0; k < dim; k++) { #ifdef DEBUG log << "\tReading "; #endif unsigned char val = keys[j * dim + k]; #ifdef DEBUG log << (int) val << ". Writing "; #endif vs[curr_array][off + k] = val; #ifdef DEBUG log << (int) vs[curr_array][off + k] << std::endl; #endif } #ifdef DEBUG log << "vp[curr_key] = vs[curr_array] + off;" << std::endl; #endif vp[curr_key] = vs[curr_array] + off; curr_key++; off += dim; if (off == MAX_ARRAY_SIZE) { off = 0; curr_array++; } } delete [] keys; if (info != NULL) delete [] info; } } log << "Building tree..." << std::endl; VocabTree tree; tree.Build(total_keys, dim, depth, bf, restarts, vp); tree.Write(tree_out); log.close(); return 0; }
int main(int argc, char **argv) { const int dim = 128; if (argc != 6 && argc != 7 && argc != 8 && argc != 9 && argc != 10 && argc != 11) { printf("Usage: %s <tree.in> <db.in> <query.in> <num_nbrs> " "<match-script.out> [leaves_only] [distance_type] [normalize] " "[min_feature_scale] [max_keys]\n", argv[0]); return 1; } char *tree_in = argv[1]; char *db_in = argv[2]; char *query_in = argv[3]; int num_nbrs = atoi(argv[4]); char *matches_out = argv[5]; bool leaves_only = false; bool normalize = true; double min_feature_scale = 0.0; DistanceType distance_type = DistanceMin; int max_keys = 0; if (argc >= 7) { if (atoi(argv[6]) != 0) leaves_only = true; } if (argc >= 8) distance_type = (DistanceType) atoi(argv[7]); if (argc >= 9) if (atoi(argv[8]) == 0) normalize = false; if (argc >= 10) { min_feature_scale = atof(argv[9]); } if (argc >= 11) { max_keys = atoi(argv[10]); } if (leaves_only) { printf("[VocabMatch] Scoring with leaves only\n"); } else { printf("[VocabMatch] Scoring with all nodes\n"); } printf("[VocabMatch] Using tree %s\n", tree_in); printf("[VocabMatch] min_feature_scale = %0.3f\n", min_feature_scale); printf("[VocabMatch] max_keys = %d\n", max_keys); switch (distance_type) { case DistanceDot: printf("[VocabMatch] Using distance Dot\n"); break; case DistanceMin: printf("[VocabMatch] Using distance Min\n"); break; default: printf("[VocabMatch] Using no known distance!\n"); break; } /* Read the tree */ printf("[VocabMatch] Reading tree...\n"); fflush(stdout); clock_t start = clock(); VocabTree tree; tree.Read(tree_in); clock_t end = clock(); printf("[VocabMatch] Read tree in %0.3fs\n", (double) (end - start) / CLOCKS_PER_SEC); #if 1 tree.Flatten(); #endif tree.SetDistanceType(distance_type); if (leaves_only) { tree.SetInteriorNodeWeight(atoi(argv[6]) - 1, 0.0); // #define CONSTANT_WEIGHTS #ifdef CONSTANT_WEIGHTS tree.SetConstantLeafWeights(); #endif } /* Read the database keyfiles */ FILE *f = fopen(db_in, "r"); std::vector<std::string> db_files; char buf[256]; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; db_files.push_back(std::string(buf)); } fclose(f); /* Read the query keyfiles */ f = fopen(query_in, "r"); std::vector<std::string> query_files; std::vector<int> query_indices; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; char keyfile[256]; int index; sscanf(buf, "%d %s", &index, keyfile); query_files.push_back(std::string(keyfile)); query_indices.push_back(index); } fclose(f); /* Populate the database */ printf("[VocabMatch] Populating database...\n"); fflush(stdout); int num_db_images = db_files.size(); int num_query_images = query_files.size(); /* Now score each query keyfile */ printf("[VocabMatch] Scoring query images...\n"); fflush(stdout); float *scores = new float[num_db_images]; double *scores_d = new double[num_db_images]; int *perm = new int[num_db_images]; FILE *f_match = fopen(matches_out, "w"); if (f_match == NULL) { printf("[VocabMatch] Error opening file %s for writing\n", matches_out); return 1; } for (int i = 0; i < num_query_images; i++) { int index_i = query_indices[i]; start = clock(); /* Clear scores */ for (int j = 0; j < num_db_images; j++) scores[j] = 0.0; unsigned char *keys; int num_keys; keys = ReadAndFilterKeys(query_files[i].c_str(), dim, min_feature_scale, max_keys, num_keys); tree.ScoreQueryKeys(num_keys, /*i,*/ true, keys, scores); end = clock(); printf("[VocabMatch] Scored image %s (%d keys) in %0.3fs\n", query_files[i].c_str(), num_keys, (double) (end - start) / CLOCKS_PER_SEC); #if 0 for (int j = 0; j < num_db_images; j++) { /* Normalize scores */ if (magnitudes[j] > 0.0) scores[j] /= magnitudes[j]; else scores[j] = 0.0; } #endif /* Find the top scores */ for (int j = 0; j < num_db_images; j++) { scores_d[j] = (double) scores[j]; } qsort_descending(); qsort_perm(num_db_images, scores_d, perm); // assert(is_sorted(num_db_images, scores_d)); int top = MIN(num_nbrs+1, num_db_images); for (int j = 0; j < top; j++) { if (perm[j] == index_i) continue; fprintf(f_match, "%d %d %0.5e\n", index_i, perm[j], scores_d[j]); fflush(f_match); } fflush(stdout); delete [] keys; } fclose(f_match); delete [] scores; delete [] scores_d; delete [] perm; return 0; }
int main(int argc, char **argv) { if (argc != 6) { printf("Usage: %s <list.in> <depth> <branching_factor> " "<restarts> <tree.out>\n", argv[0]); return 1; } const char *list_in = argv[1]; int depth = atoi(argv[2]); int bf = atoi(argv[3]); int restarts = atoi(argv[4]); const char *tree_out = argv[5]; printf("Building tree with depth: %d, branching factor: %d, " "and restarts: %d\n", depth, bf, restarts); FILE *f = fopen(list_in, "r"); if (f == NULL) { printf("Could not open file: %s\n", list_in); return 1; } std::vector<std::string> key_files; char buf[256]; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; key_files.push_back(std::string(buf)); } fclose(f); int num_files = (int) key_files.size(); unsigned long total_keys = 0; for (int i = 0; i < num_files; i++) { int num_keys = GetNumberOfKeys(key_files[i].c_str()); total_keys += num_keys; } printf("Total number of keys: %lu\n", total_keys); // Reduce the branching factor if need be if there are not // enough keys, to avoid problems later. if (bf >= (int)total_keys){ bf = total_keys - 1; printf("Reducing the branching factor to: %d\n", bf); } fflush(stdout); int dim = 128; unsigned long long len = (unsigned long long) total_keys * dim; int num_arrays = len / MAX_ARRAY_SIZE + ((len % MAX_ARRAY_SIZE) == 0 ? 0 : 1); unsigned char **vs = new unsigned char *[num_arrays]; printf("Allocating %llu bytes in total, in %d arrays\n", len, num_arrays); unsigned long long total = 0; for (int i = 0; i < num_arrays; i++) { unsigned long long remainder = len - total; unsigned long len_curr = MIN(remainder, MAX_ARRAY_SIZE); printf("Allocating array of size %lu\n", len_curr); fflush(stdout); vs[i] = new unsigned char[len_curr]; remainder -= len_curr; } /* Create the array of pointers */ printf("Allocating pointer array of size %lu\n", 4 * total_keys); fflush(stdout); unsigned char **vp = new unsigned char *[total_keys]; unsigned long off = 0; unsigned long curr_key = 0; int curr_array = 0; for (int i = 0; i < num_files; i++) { printf(" Reading keyfile %s\n", key_files[i].c_str()); fflush(stdout); short int *keys; int num_keys = 0; keypt_t *info = NULL; num_keys = ReadKeyFile(key_files[i].c_str(), &keys); if (num_keys > 0) { for (int j = 0; j < num_keys; j++) { for (int k = 0; k < dim; k++) { vs[curr_array][off + k] = keys[j * dim + k]; } vp[curr_key] = vs[curr_array] + off; curr_key++; off += dim; if (off == MAX_ARRAY_SIZE) { off = 0; curr_array++; } } delete [] keys; if (info != NULL) delete [] info; } } VocabTree tree; tree.Build(total_keys, dim, depth, bf, restarts, vp); tree.Write(tree_out); return 0; }
// compare images feature using db int VocabCompare(int argc, char **argv) { if (argc != 5 && argc != 6) { printf("Usage: %s <tree.in> <image1.key> <image2.key> <matches.out> [distance_type]\n", argv[0]); return 1; } char *tree_in = argv[1]; char *image1_in = argv[2]; char *image2_in = argv[3]; char *matches_out = argv[4]; DistanceType distance_type = DistanceMin; if (argc >= 6) distance_type = (DistanceType)atoi(argv[5]); switch (distance_type) { case DistanceDot: printf("[VocabMatch] Using distance Dot\n"); break; case DistanceMin: printf("[VocabMatch] Using distance Min\n"); break; default: printf("[VocabMatch] Using no known distance!\n"); break; } printf("[VocabBuildDB] Reading tree %s...\n", tree_in); fflush(stdout); VocabTree tree; tree.Read(tree_in); printf("[VocabCompare] Flattening tree...\n"); tree.Flatten(); tree.m_distance_type = distance_type; tree.SetInteriorNodeWeight(0.0); /* Initialize leaf weights to 1.0 */ tree.SetConstantLeafWeights(); const int dim = 128; tree.ClearDatabase(); int num_keys_1 = 0, num_keys_2 = 0; unsigned char *keys1 = ReadKeys(image1_in, dim, num_keys_1); unsigned char *keys2 = ReadKeys(image2_in, dim, num_keys_2); unsigned long *ids1 = new unsigned long[num_keys_1]; unsigned long *ids2 = new unsigned long[num_keys_2]; printf("[VocabCompare] Adding image 0 (%d keys)\n", num_keys_1); tree.AddImageToDatabase(0, num_keys_1, keys1, ids1); if (num_keys_1 > 0) delete[] keys1; printf("[VocabCompare] Adding image 1 (%d keys)\n", num_keys_2); tree.AddImageToDatabase(1, num_keys_2, keys2, ids2); if (num_keys_2 > 0) delete[] keys2; // tree.ComputeTFIDFWeights(); tree.NormalizeDatabase(0, 2); //Find collisions among visual word IDs std::multimap<unsigned long, unsigned int> word_map; for (unsigned int i = 0; i < (unsigned int)num_keys_1; i++) { printf("0 %d -> %lu\n", i, ids1[i]); std::pair<unsigned long, unsigned int> elem(ids1[i], i); word_map.insert(elem); } //Count number of matches int num_matches = 0; for (unsigned int i = 0; i < (unsigned int)num_keys_2; i++) { unsigned long id = ids2[i]; printf("1 %d -> %lu\n", i, ids2[i]); std::pair<std::multimap<unsigned long, unsigned int>::iterator, std::multimap<unsigned long, unsigned int>::iterator> ret; ret = word_map.equal_range(id); unsigned int size = 0; std::multimap<unsigned long, unsigned int>::iterator iter; for (iter = ret.first; iter != ret.second; iter++) size++, num_matches++; if (size > 0) printf("size[%lu] = %d\n", id, size); } printf("number of matches: %d\n", num_matches); fflush(stdout); FILE *f = fopen(matches_out, "w"); if (f == NULL) { printf("Error opening file %s for writing\n", matches_out); return 1; } fprintf(f, "%d\n", num_matches); /* Write matches */ for (unsigned int i = 0; i < (unsigned int)num_keys_2; i++) { unsigned long id = ids2[i]; std::pair<std::multimap<unsigned long, unsigned int>::iterator, std::multimap<unsigned long, unsigned int>::iterator> ret; ret = word_map.equal_range(id); std::multimap<unsigned long, unsigned int>::iterator iter; for (iter = ret.first; iter != ret.second; iter++) fprintf(f, "%d %d\n", iter->second, i); } fclose(f); // printf("[VocabBuildDB] Writing tree...\n"); // tree.Write(tree_out); return 0; }
//Read a database stored as a vocab tree and score a set of query images int VocabMatch(char *db_in, char *list_in, char *query_in, int num_nbrs, char *matches_out, DistanceType distance_type = DistanceMin, int normalize = 1) { const int dim = 128; printf("[VocabMatch] Using database %s\n", db_in); switch (distance_type) { case DistanceDot: printf("[VocabMatch] Using distance Dot\n"); break; case DistanceMin: printf("[VocabMatch] Using distance Min\n"); break; default: printf("[VocabMatch] Using no known distance!\n"); break; } // Read the tree printf("[VocabMatch] Reading database...\n"); fflush(stdout); clock_t start = clock(); VocabTree tree; tree.Read(db_in); clock_t end = clock(); printf("[VocabMatch] Read database in %0.3fs\n", (double)(end - start) / CLOCKS_PER_SEC); tree.Flatten(); tree.SetDistanceType(distance_type); tree.SetInteriorNodeWeight(0, 0.0); // Read the database keyfiles FILE *f = fopen(list_in, "r"); if (f == NULL) { printf("Could not open file: %s\n", list_in); return 1; } std::vector<std::string> db_files; char buf[256]; while (fgets(buf, 256, f)) { // Remove trailing newline if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; db_files.push_back(std::string(buf)); } fclose(f); //Read the query keyfiles f = fopen(query_in, "r"); if (f == NULL) { printf("Could not open file: %s\n", query_in); return 1; } std::vector<std::string> query_files; while (fgets(buf, 256, f)) { // Remove trailing newline if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; char keyfile[256]; sscanf(buf, "%s", keyfile); query_files.push_back(std::string(keyfile)); } fclose(f); int num_db_images = (int)db_files.size(); int num_query_images = (int)query_files.size(); printf("[VocabMatch] Read %d database images\n", num_db_images); //Now score each query keyfile printf("[VocabMatch] Scoring %d query images...\n", num_query_images); fflush(stdout); float *scores = new float[num_db_images]; double *scores_d = new double[num_db_images]; int *perm = new int[num_db_images]; FILE *f_match = fopen(matches_out, "w"); if (f_match == NULL) { printf("[VocabMatch] Error opening file %s for writing\n", matches_out); return 1; } bool bnormalize = normalize == 1 ? true : false; for (int i = 0; i < num_query_images; i++) { start = clock(); for (int j = 0; j < num_db_images; j++) scores[j] = 0.0; int num_keys; unsigned char *keys = ReadKeys(query_files[i].c_str(), dim, num_keys); clock_t start_score = clock(); double mag = tree.ScoreQueryKeys(num_keys, bnormalize, keys, scores); clock_t end_score = end = clock(); printf("[VocabMatch] Scored image %s in %0.3fs ( %0.3fs total, num_keys = %d, mag = %0.3f )\n", query_files[i].c_str(), (double)(end_score - start_score) / CLOCKS_PER_SEC, (double)(end - start) / CLOCKS_PER_SEC, num_keys, mag); //Find the top scores for (int j = 0; j < num_db_images; j++) scores_d[j] = (double)scores[j]; qsort_descending(); qsort_perm(num_db_images, scores_d, perm); int top = MIN(num_nbrs, num_db_images); for (int j = 0; j < top; j++) { // if (perm[j] == index_i) // continue; fprintf(f_match, "%d %d %0.4f\n", i, perm[j], scores_d[j]); //fprintf(f_match, "%d %d %0.4f\n", i, perm[j], mag - scores_d[j]); } fflush(f_match); fflush(stdout); delete[] keys; } fclose(f_match); delete[] scores, delete[] scores_d, delete[] perm; return 0; }
// Building a database with a vocabulary tree int VocabDB(char *list_in, char *tree_in, char *db_out, int use_tfidf = 1, int normalize = 1, int start_id = 0, DistanceType distance_type = DistanceMin) { double min_feature_scale = 1.4; switch (distance_type) { case DistanceDot: printf("[VocabMatch] Using distance Dot\n"); break; case DistanceMin: printf("[VocabMatch] Using distance Min\n"); break; default: printf("[VocabMatch] Using no known distance!\n"); break; } FILE *f = fopen(list_in, "r"); if (f == NULL) { printf("Error opening file %s for reading\n", list_in); return 1; } std::vector<std::string> key_files; char buf[256]; while (fgets(buf, 256, f)) { /* Remove trailing newline */ if (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = 0; key_files.push_back(std::string(buf)); } printf("[VocabBuildDB] Reading tree %s...\n", tree_in); fflush(stdout); VocabTree tree; tree.Read(tree_in); tree.Flatten(); tree.m_distance_type = distance_type; tree.SetInteriorNodeWeight(0.0); //Initialize leaf weights to 1.0 tree.SetConstantLeafWeights(); const int dim = 128; int num_db_images = (int)key_files.size(); unsigned long count = 0; tree.ClearDatabase(); for (int i = 0; i < num_db_images; i++) { int num_keys = 0; unsigned char *keys = ReadAndFilterKeys(key_files[i].c_str(), dim, min_feature_scale, 0, num_keys); printf("[VocabBuildDB] Adding vector %d (%d keys)\n", start_id + i, num_keys); tree.AddImageToDatabase(start_id + i, num_keys, keys); if (num_keys > 0) delete[] keys; } printf("[VocabBuildDB] Pushed %lu features\n", count); fflush(stdout); if (use_tfidf == 1) tree.ComputeTFIDFWeights(num_db_images); if (normalize == 1) tree.NormalizeDatabase(start_id, num_db_images); printf("[VocabBuildDB] Writing database ...\n"); tree.Write(db_out); // char filename[256]; // sprintf(filename, "vectors_%03d.txt", start_id); // tree.WriteDatabaseVectors(filename, start_id, num_db_images); return 0; }