// static void LLSpellChecker::saveUserDictionaryMap(const LLSD& dict_map) { llofstream dict_file((getDictionaryUserPath() + DICT_FILE_USER).c_str(), std::ios::trunc); if (dict_file.is_open()) { LLSDSerialize::toPrettyXML(dict_map, dict_file); dict_file.close(); } }
// static LLSD LLSpellChecker::loadUserDictionaryMap() { LLSD dict_map; std::string dict_filename(getDictionaryUserPath() + DICT_FILE_USER); llifstream dict_file(dict_filename.c_str(), std::ios::binary); if (dict_file.is_open()) { LLSDSerialize::fromXMLDocument(dict_map, dict_file); dict_file.close(); } return dict_map; }
bool SpellCheckWidget::createUserDict(QString dict_name) { QString path = SpellCheck::userDictionaryDirectory() + "/" + dict_name; QFile dict_file(path); if (dict_file.open(QIODevice::WriteOnly | QIODevice::Truncate)) { dict_file.close(); } else { QMessageBox::critical(this, tr("Error"), tr("Could not create file!")); return false; } addNewItem(true, dict_name); ui.userDictList->sortByColumn(1, Qt::AscendingOrder); return true; }
int main(int argc, char** argv) { // wordsmith /path/to/dict/file std::string dict_file("/usr/share/dict/words"); if (argc >= 2) { dict_file = argv[1]; } std::cout << "Generating word bucket from " << dict_file << "..." << std::endl; WordBucket wb(dict_file); std::string input, my_letters; std::cout << "Enter 'l abcdefg' to specify your letters." << std::endl; std::cout << "Enter 'c' to clear your letters." << std::endl; std::cout << "Enter 'e __a_b__' to query an expression." << std::endl; std::cout << "Enter 'w __a_b__' to get top-scoring words for an expression." << std::endl; std::cout << "Enter 'q' to quit." << std::endl; while (true) { std::cout << "> "; getline(std::cin, input); try { if (input.at(0) == 'q') { break; } else if (input.at(0) == 'c') { my_letters.clear(); std::cout << "Cleared your letters." << std::endl; } else if (input.at(0) == 'l') { if (input.size() > 2) { my_letters = input.substr(2); std::cout << "Set your letters to: " << my_letters << std::endl; } else { std::cout << "Your letters are: " << my_letters << std::endl; } } else if ((input.at(0) == 'w' || input.at(0) == 'e') && input.size() > 2) { Expressionizer e(input.substr(2)); ExpressionSeq es(e.get_all_expressions()); for (ExpressionSeq::const_iterator it = es.begin() ; it != es.end() ; ++it) { const Expression& e(*it); std::cout << " " << e << ": "; PossibleWordSeq pws; wb.generate_words_for(*it, my_letters, pws); for (PossibleWordSeq::const_iterator it2 = pws.begin() ; it2 != pws.end() ; ++it2) { std::cout << " " << *it2; } std::cout << std::endl; } } else { std::cout << "Invalid input: " << input << std::endl; } } catch (const std::exception& ex) { std::cout << "Error: " << ex.what() << std::endl; } } std::cout << "Done!" << std::endl; return 0; }
int main() { clock_t start_time = std::clock(); snap::web::print_header(); // get user input int content_length = atoi(getenv("CONTENT_LENGTH")); char *input = new char[content_length+1]; fgets(input, content_length+1, stdin); std::string query_string(input); delete[] input; // process user input std::map<std::string, std::string> arguments = snap::web::parse_query_string(query_string); int num_excerpts = stoi(arguments["num-excerpts"]); int excerpt_size = stoi(arguments["excerpt-size"]); // dates boost::gregorian::date current_date, from_date, to_date; try { current_date = snap::date::string_to_date(arguments["from-date"]); from_date = snap::date::string_to_date(arguments["from-date"]); to_date = snap::date::string_to_date(arguments["to-date"]); } catch (snap::date::InvalidDateException &e) { std::cout << "<span class=\"error\">" << e.what() << "</span>" << std::endl; exit(-1); } std::vector<std::string> file_list = snap::io::generate_file_names(from_date, to_date, prefix, suffix); // process search strings std::vector<std::string> search_strings; arguments["search-strings"] = snap::web::decode_uri(arguments["search-strings"]); boost::split(search_strings, arguments["search-strings"], boost::is_any_of("\n")); // remove empty strings auto search_string_iterator = search_strings.begin(); while (search_string_iterator != search_strings.end()) { if (std::all_of(search_string_iterator -> begin() , search_string_iterator -> end(), ::isspace)) { search_string_iterator = search_strings.erase(search_string_iterator); } else { boost::algorithm::trim(*search_string_iterator); ++search_string_iterator; } } if (search_strings.size() == 0) { std::cout << "<span class=\"error\">" << "Error: There are no search strings." << "</span>" << std::endl; exit(-1); } std::sort(search_strings.begin(), search_strings.end()); std::vector<snap::Expression> expressions; std::set<std::string> pattern_set; for (auto it = search_strings.begin(); it != search_strings.end(); ++it) { try { expressions.emplace_back(*it); } catch(snap::ExpressionSyntaxError &e) { const char *error_msg = e.what(); std::cout << "<span class=\"error\">" << error_msg << "</span>" << std::endl; delete[] error_msg; exit(-1); } pattern_set.insert(expressions.back().patterns.begin(), expressions.back().patterns.end()); } std::vector<std::string> patterns; patterns.insert(patterns.end(), pattern_set.begin(), pattern_set.end()); // print output for user to verify std::cout << "<p>" << std::endl; std::cout << "Search strings:" << "<br/>" << std::endl; for (auto it = search_strings.begin(); it != search_strings.end(); ++it) { std::cout << *it << "<br/>" << std::endl; } std::cout << "From (inclusive): <span id=\"from-date\">" << arguments["from-date"] << "</span><br/>" << std::endl; std::cout << "To (inclusive): <span id=\"to-date\">" << arguments["to-date"] << "</span><br/>" << std::endl; std::cout << "Number of Excerpts: " << arguments["num-excerpts"] << "<br/>" << std::endl; std::cout << "Excerpt Size: " << arguments["excerpt-size"] << "<br/>" << std::endl; std::cout << "</p>" << std::endl; // variables to store results of loop std::vector<std::vector<std::string>> search_results; std::vector<std::vector<std::string>> search_results_programs; std::vector<std::vector<std::string>> search_results_total_matches; int total_programs_cnt = 0; int selected_programs_cnt = 0; std::vector<std::string> corrupt_files; std::vector<std::string> missing_files; std::vector<snap::Excerpt> excerpts; // print table header std::cout << "<table><thead><tr><th>dt</th>"; for (auto it = search_strings.begin(); it != search_strings.end(); ++it) { std::cout << "<th>" << (*it) + " Contexts" << "</th>"; } std::cout << "<th>selected_programs_cnt</th></tr></thead><tbody>" << std::endl; snap::StringHasher hasher("", M, A); std::unordered_map<std::string, std::unordered_map<int, int>> total_left_word_hashes; std::unordered_map<std::string, std::unordered_map<int, int>> total_right_word_hashes; std::map<std::string, std::tuple<int, int, int>> match_counts; for (auto it = file_list.begin(); it != file_list.end(); ++it) { boost::gregorian::date current_date = snap::date::string_to_date((*it).substr(prefix.length(), 10)); if (snap::io::file_exists(*it)) { std::vector<snap::Program> programs; try { programs = snap::io::parse_programs(*it); } catch (snap::io::CorruptFileException &e) { programs.clear(); corrupt_files.push_back(*it); continue; } search_results.push_back(std::vector<std::string>{snap::date::date_to_string(current_date)}); search_results_programs.push_back(std::vector<std::string>{snap::date::date_to_string(current_date)}); search_results_total_matches.push_back(std::vector<std::string>{snap::date::date_to_string(current_date)}); std::unordered_map<std::string, std::unordered_map<int, int>> daily_left_word_hashes; std::unordered_map<std::string, std::unordered_map<int, int>> daily_right_word_hashes; std::cout << "<tr><td>" << snap::date::date_to_string(current_date) << "</td>"; total_programs_cnt += programs.size(); int daily_selected_programs_cnt = 0; std::map<std::string, std::tuple<int, int, int>> daily_match_counts; for (auto p = programs.begin(); p != programs.end(); ++p) { ++selected_programs_cnt; ++daily_selected_programs_cnt; hasher.load_text(p -> lower_text); std::map<std::string, std::vector<int>> raw_match_positions = snap::find(patterns, p -> lower_text); std::map<std::string, std::vector<int>> match_positions = snap::evaluate_expressions(expressions, raw_match_positions); for (auto ss = search_strings.begin(); ss != search_strings.end(); ++ss) { if (match_positions[*ss].size() > 0) { bool total_context_added = false; bool context_added = false; ++std::get<1>(daily_match_counts[*ss]); ++std::get<1>(match_counts[*ss]); std::get<2>(daily_match_counts[*ss]) += match_positions[*ss].size(); std::get<2>(match_counts[*ss]) += match_positions[*ss].size(); for (auto it = match_positions[*ss].begin(); it != match_positions[*ss].end(); ++it) { int left_word_hash = hasher.hash(*it - LEFT_HASH_WIDTH, *it); int right_word_hash = hasher.hash(*it, *it + RIGHT_HASH_WIDTH); int daily_left_hash_cnt = daily_left_word_hashes[*ss][left_word_hash]++; int daily_right_hash_cnt = daily_right_word_hashes[*ss][right_word_hash]++; int total_left_hash_cnt = total_left_word_hashes[*ss][left_word_hash]++; int total_right_hash_cnt = total_right_word_hashes[*ss][right_word_hash]++; if (daily_left_hash_cnt == 0 && daily_right_hash_cnt == 0) { if (!context_added) { ++std::get<0>(daily_match_counts[*ss]); context_added = true; } if (total_left_hash_cnt == 0 && total_right_hash_cnt == 0) { if (!total_context_added) { ++std::get<0>(match_counts[*ss]); total_context_added = true; } excerpts.emplace_back(*p, *it - excerpt_size, *it + excerpt_size); std::vector<std::string> search_string_patterns = expressions[ss - search_strings.begin()].patterns; for (auto pattern = search_string_patterns.begin(); pattern != search_string_patterns.end(); ++pattern) { excerpts.back().highlight_word(*pattern); } } } } } } } for (auto ss = search_strings.begin(); ss != search_strings.end(); ++ss) { search_results.back().push_back(std::to_string(std::get<0>(daily_match_counts[*ss]))); search_results_programs.back().push_back(std::to_string(std::get<1>(daily_match_counts[*ss]))); search_results_total_matches.back().push_back(std::to_string(std::get<2>(daily_match_counts[*ss]))); std::cout << "<td>" << std::get<0>(daily_match_counts[*ss]) << "</td>"; } search_results.back().push_back(std::to_string(daily_selected_programs_cnt)); search_results_programs.back().push_back(std::to_string(daily_selected_programs_cnt)); search_results_total_matches.back().push_back(std::to_string(daily_selected_programs_cnt)); std::cout << "<td>" << daily_selected_programs_cnt << "</td>"; std::cout << "</tr>" << std::endl; programs.clear(); } else { missing_files.push_back(*it); } } // print out total line std::cout << "<tr>" << std::endl; search_results.emplace_back(); search_results_programs.emplace_back(); search_results_total_matches.emplace_back(); std::cout << "<td><strong>Grand Total:</strong></td>" << std::endl; search_results.back().push_back("Grand Total:"); search_results_programs.back().push_back("Grand Total:"); search_results_total_matches.back().push_back("Grand Total:"); for (std::string ss : search_strings) { std::cout << "<td>" << std::get<0>(match_counts[ss]) << "</td>" << std::endl; search_results.back().push_back(std::to_string(std::get<0>(match_counts[ss]))); search_results_programs.back().push_back(std::to_string(std::get<1>(match_counts[ss]))); search_results_total_matches.back().push_back(std::to_string(std::get<2>(match_counts[ss]))); } std::cout << "<td>" << total_programs_cnt << "</td>" << std::endl; search_results.back().push_back(std::to_string(total_programs_cnt)); std::cout << "</tr>" << std::endl; std::cout << "</tbody></table>" << std::endl; std::cout << "<div>"; std::cout << "<br/>" << std::endl; snap::web::print_missing_files(missing_files); std::cout << "<br/>" << std::endl; snap::web::print_corrupt_files(corrupt_files); std::cout << "</div>" << std::endl; snap::web::print_excerpts(excerpts, num_excerpts, true); // output file srand(time(NULL)); std::string random_id = std::to_string(rand()); output_matrix_file(search_results, search_strings, random_id, "contexts"); output_matrix_file(search_results_programs, search_strings, random_id, "programs"); output_matrix_file(search_results_total_matches, search_strings, random_id, "total_matches"); // all data in long form std::map<std::string, std::tuple<std::string, std::string, std::string>> dict; if (snap::io::file_exists("dictionary.csv")) { std::ifstream dict_file("dictionary.csv"); dict = snap::io::read_dictionary(dict_file); } std::string output_file_name = search_results.front().front() + "_all_" + random_id + ".csv"; std::string output_file_path = output_path + output_file_name; std::ofstream output_file(output_file_path); output_file << "Date,Term,Contexts,Programs,Total Matches"; for (int i = 0; i < search_results.size() - 1; ++i) { // skip total line for (int j = 0; j < search_strings.size(); ++j) { output_file << '\n'; output_file << search_results[i].front() << ',' << (dict.count(search_strings[j]) ? std::get<1>(dict[search_strings[j]]) : search_strings[j]) << ',' << search_results[i][j + 1] << ',' // j + 1 skips date column << search_results_programs[i][j + 1] << ',' << search_results_total_matches[i][j + 1]; } } output_file.close(); std::cout << "<p>"; std::cout << snap::web::create_link(output_file_path, "Output Long File", "long-data"); std::cout << "</p>" << std::endl; std::cout << "<p>"; std::cout << snap::web::create_link("../time-series.html?filename=tmp%2F" + output_file_name + "&title=Snapstream%20Time%20Series", "Visualization", "visualization"); std::cout << "</p>" << std::endl; double duration = (std::clock() - start_time) / (double) CLOCKS_PER_SEC; std::cout << "<br/><span>Time taken (seconds): " << duration << "</span><br/>" << std::endl; snap::web::close_html(); return 0; }