/* * Returns a string that Spoonerism has been performed on. * A string is spoonerized by swapping the beginning of the first word in * the string with the beginning of the second word. * <param_name = "first_len"> : Represents the number of characters from * the first word to swap. * <param_name = "second_len"> : Represents the number of characters from * the second word to swap. * If the word frequency of the string, along with first_len and second_len * exceed 2, then the original string is returned. */ STR spoonerize( STR str, const int &first_len, const int &second_len ) { if ( word_frequency( str ) > 2 || ( ( first_len < 1 || first_len > 2 ) || ( second_len < 1 || second_len > 2 ) ) ) return str; std::stringstream ss; std::size_t space_pos = str.find( ' ' ); if ( first_len == 1 && second_len == 1 ) { std::swap( str[0], str[( space_pos + 1 )] ); return str; } else if ( first_len == 2 && second_len == 1 ) { ss << str[( space_pos + 1 )] << str.substr( 2, ( space_pos - 1 ) ) << str.substr( 0, 2 ) << str.substr( ( space_pos + 2 ), ( str.size() - 1 ) ); } else if ( first_len == 1 && second_len == 2 ) { ss << str.substr( ( space_pos + 1 ), 2 ) << str.substr( 1, space_pos ) << str[0] << str.substr( ( space_pos + 3 ), ( str.size() - 1 ) ); } else if ( first_len == 2 && second_len == 2 ) { ss << str.substr( ( space_pos + 1 ), 2 ) << str.substr( 2, space_pos - 1 ) << str.substr( 0, 2 ) << str.substr( ( space_pos + 3 ), ( str.size() - 1 ) ); } str = ss.str(); return str; }
int main(int argc, char **argv){ std::vector<float> train_local_descriptors; std::vector<std::string> train_local_descriptor_files(3); train_local_descriptor_files[0] = "imagenes/descriptores_locales/train_car"; train_local_descriptor_files[1] = "imagenes/descriptores_locales/train_cat"; train_local_descriptor_files[2] = "imagenes/descriptores_locales/train_bird"; int count = 0; int dimension = 128; int max_files = 160; int file_counter; for (int i = 0; i < 3; ++i){ /* code */ std::ifstream infile(train_local_descriptor_files[i]); file_counter = 0; std::string line; std::vector<std::string> vectorAsString; while (std::getline(infile, line)){ if (Utils::stringEndsWith(line, "jpg")){ file_counter++; std::getline(infile, line); if(file_counter >= max_files){ break; } continue; } vectorAsString.clear(); vectorAsString = Utils::split(line, ','); for(int j = 0; j < dimension; j++){ train_local_descriptors.push_back(std::stof(vectorAsString[j])); } count++; // process pair (a,b) } infile.close(); } std::cout << "Read " << train_local_descriptors.size() << " local descriptors from file" << std::endl; cv::Mat descriptor_matrix = cv::Mat(count, dimension, CV_32FC1); memcpy(descriptor_matrix.data, train_local_descriptors.data(), train_local_descriptors.size()*sizeof(float)); std::cout << "Starting k-means clustering" << std::endl; int nClusters = std::stoi(argv[1]); ClusterComputer clusterComputer(nClusters); cv::Mat labels; cv::Mat centers; clusterComputer.compute(descriptor_matrix, labels, centers); std::cout << "Cumputed clusters" << std::endl; std::cout << "Number of centers: " << centers.rows << std::endl; std::cout << "Centers dimension: " << centers.cols << std::endl; std::vector<std::string> local_descriptor_files (9); // Inicializar local_descriptor_files[0] = "imagenes/descriptores_locales/train_car"; local_descriptor_files[1] = "imagenes/descriptores_locales/train_cat"; local_descriptor_files[2] = "imagenes/descriptores_locales/train_bird"; local_descriptor_files[3] = "imagenes/descriptores_locales/test_car"; local_descriptor_files[4] = "imagenes/descriptores_locales/test_cat"; local_descriptor_files[5] = "imagenes/descriptores_locales/test_bird"; local_descriptor_files[7] = "imagenes/descriptores_locales/val_cat"; local_descriptor_files[6] = "imagenes/descriptores_locales/val_car"; local_descriptor_files[8] = "imagenes/descriptores_locales/val_bird"; std::unordered_map<std::string, std::vector<int>> document_word_frequency; std::unordered_map<std::string, std::vector<float>> tf; std::unordered_map<std::string, int> document_word_counter; std::vector<int> word_frequency(nClusters, 0); int word_total = 0; std::vector<std::vector<std::string>> filenames(9); for (int i = 0; i < 9; ++i){ std::cout << "Aproximating words for " << local_descriptor_files[i] << std::endl; std::ifstream infile(local_descriptor_files[i]); std::string line; std::vector<std::string> vectorAsString; std::string currentFile = ""; // std::cout << "File opened" << std::endl; while (std::getline(infile, line)){ // std::cout << "Read line: " << line << std::endl; if (Utils::stringEndsWith(line, "jpg")){ // std::cout << "Line ended with jpg" << std::endl; if(currentFile != ""){ // Finished last file // std::cout << "Finished a file" << std::endl; for (int j = 0; j < nClusters; ++j){ if(document_word_frequency[currentFile][j] > 0){ word_frequency[j]++; } word_total += document_word_frequency[currentFile][j]; tf[currentFile][j] = document_word_frequency[currentFile][j] / (float) document_word_counter[currentFile]; // std::cout << "Updated frequency for last file" << std::endl; } } currentFile = line; filenames[i].push_back(currentFile); // std::cout << "Pushed filename into vector" << std::endl; document_word_frequency[currentFile] = *(new std::vector<int> (nClusters, 0)); // std::cout << "Created vector for frequency" << std::endl; tf[currentFile] = *(new std::vector<float> (nClusters, 0.0)); // std::cout << "Created vector for tf" << std::endl; std::getline(infile, line); document_word_counter[currentFile] = std::stoi(line); continue; } int word = nearestCluster(line, centers); document_word_frequency[currentFile][word] = document_word_frequency[currentFile][word]+1; } for (int j = 0; j < nClusters; ++j){ if(document_word_frequency[currentFile][j] > 0){ word_frequency[j]++; } word_total += document_word_frequency[currentFile][j]; tf[currentFile][j] = document_word_frequency[currentFile][j] / (float) document_word_counter[currentFile]; } } std::cout << "Computing idf values" << std::endl; std::vector<float> idf(nClusters, 0); for (int i = 0; i < nClusters; ++i){ idf[i] = log(word_total / (float) word_frequency[i]); } std::string output_directory = "imagenes/BOVW_descriptors_" + std::to_string(nClusters) + "/"; std::vector<std::string> output_filenames(9); output_filenames[0] = output_directory + "train_car"; output_filenames[1] = output_directory + "train_cat"; output_filenames[2] = output_directory + "train_bird"; output_filenames[3] = output_directory + "test_car"; output_filenames[4] = output_directory + "test_cat"; output_filenames[5] = output_directory + "test_bird"; output_filenames[6] = output_directory + "val_car"; output_filenames[7] = output_directory + "val_cat"; output_filenames[8] = output_directory + "val_bird"; std::ofstream outputFile; std::vector<float> BOVW_descriptor(nClusters, 0.0); for (int i = 0; i < 9; ++i){ std::cout << "Computing BOVW descriptors for " << output_filenames[i] << std::endl; outputFile.open(output_filenames[i]); int file_size = filenames[i].size(); for (int j = 0; j < file_size; ++j){ outputFile << filenames[i][j] << std::endl; for (int k = 0; k < nClusters; ++k){ BOVW_descriptor[k] = tf[filenames[i][j]][k] * idf[k]; } outputFile << Utils::vectorToString(BOVW_descriptor) << std::endl; } outputFile.close(); } }