Beispiel #1
0
// static
void LLSpellChecker::saveUserDictionaryMap(const LLSD& dict_map)
{
	llofstream dict_file((getDictionaryUserPath() + DICT_FILE_USER).c_str(), std::ios::trunc);
	if (dict_file.is_open())
	{
		LLSDSerialize::toPrettyXML(dict_map, dict_file);
		dict_file.close();
	}
}
Beispiel #2
0
// static
LLSD LLSpellChecker::loadUserDictionaryMap()
{
	LLSD dict_map;
    std::string dict_filename(getDictionaryUserPath() + DICT_FILE_USER);
	llifstream dict_file(dict_filename.c_str(), std::ios::binary);
	if (dict_file.is_open())
	{
		LLSDSerialize::fromXMLDocument(dict_map, dict_file);
		dict_file.close();
	}
	return dict_map;
}
Beispiel #3
0
bool SpellCheckWidget::createUserDict(QString dict_name)
{
    QString path = SpellCheck::userDictionaryDirectory() + "/" + dict_name;
    QFile dict_file(path);

    if (dict_file.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
        dict_file.close();
    } else {
        QMessageBox::critical(this, tr("Error"), tr("Could not create file!"));
        return false;
    }

    addNewItem(true, dict_name);
    ui.userDictList->sortByColumn(1, Qt::AscendingOrder);

    return true;
}
int main(int argc, char** argv)
{
    // wordsmith /path/to/dict/file
    std::string dict_file("/usr/share/dict/words");
    if (argc >= 2)
    {
        dict_file = argv[1];
    }
    
    std::cout << "Generating word bucket from " << dict_file << "..." << std::endl;
    WordBucket wb(dict_file);
    
    std::string input, my_letters;
    std::cout << "Enter 'l abcdefg' to specify your letters." << std::endl;
    std::cout << "Enter 'c' to clear your letters." << std::endl;
    std::cout << "Enter 'e __a_b__' to query an expression." << std::endl;
    std::cout << "Enter 'w __a_b__' to get top-scoring words for an expression." << std::endl;
    std::cout << "Enter 'q' to quit." << std::endl;
    while (true)
    {
        std::cout << "> ";
        getline(std::cin, input);
        
        try
        {
            if (input.at(0) == 'q')
            {
                break;
            }
            else if (input.at(0) == 'c')
            {
                my_letters.clear();
                std::cout << "Cleared your letters." << std::endl;
            }
            else if (input.at(0) == 'l')
            {
                if (input.size() > 2)
                {
                    my_letters = input.substr(2);
                    std::cout << "Set your letters to: " << my_letters << std::endl;
                }
                else
                {
                    std::cout << "Your letters are: " << my_letters << std::endl;
                }
            }
            else if ((input.at(0) == 'w' || input.at(0) == 'e') && input.size() > 2)
            {
                Expressionizer e(input.substr(2));
                ExpressionSeq es(e.get_all_expressions());
                for (ExpressionSeq::const_iterator it = es.begin() ; it != es.end() ; ++it)
                {
                    const Expression& e(*it);
                    std::cout << " " << e << ": ";
                    PossibleWordSeq pws;
                    wb.generate_words_for(*it, my_letters, pws);
                    for (PossibleWordSeq::const_iterator it2 = pws.begin() ; it2 != pws.end() ; ++it2)
                    {
                        std::cout << " " << *it2;
                    }
                    std::cout << std::endl;
                }
            }
            else 
            {
                std::cout << "Invalid input: " << input << std::endl;
            }
        }
        catch (const std::exception& ex)
        {
            std::cout << "Error: " << ex.what() << std::endl;
        }
    }
    
    std::cout << "Done!" << std::endl;
    return 0;
}
int main() {
  clock_t start_time = std::clock();
  snap::web::print_header();

  // get user input
  int content_length = atoi(getenv("CONTENT_LENGTH"));
  char *input = new char[content_length+1];
  fgets(input, content_length+1, stdin);
  std::string query_string(input);
  delete[] input;

  // process user input
  std::map<std::string, std::string> arguments = snap::web::parse_query_string(query_string);
  int num_excerpts = stoi(arguments["num-excerpts"]);
  int excerpt_size = stoi(arguments["excerpt-size"]);
  // dates
  boost::gregorian::date current_date, from_date, to_date;
  try {
    current_date = snap::date::string_to_date(arguments["from-date"]);
    from_date = snap::date::string_to_date(arguments["from-date"]);
    to_date = snap::date::string_to_date(arguments["to-date"]);
  } catch (snap::date::InvalidDateException &e) {
    std::cout << "<span class=\"error\">" << e.what() << "</span>" << std::endl;
    exit(-1);
  }
  std::vector<std::string> file_list = snap::io::generate_file_names(from_date, to_date, prefix, suffix);

  // process search strings
  std::vector<std::string> search_strings;
  arguments["search-strings"] = snap::web::decode_uri(arguments["search-strings"]);
  boost::split(search_strings, arguments["search-strings"], boost::is_any_of("\n"));
  // remove empty strings  
  auto search_string_iterator = search_strings.begin();
  while (search_string_iterator != search_strings.end()) {
    if (std::all_of(search_string_iterator -> begin() , search_string_iterator -> end(), ::isspace)) {
      search_string_iterator = search_strings.erase(search_string_iterator);
    } else {
      boost::algorithm::trim(*search_string_iterator);
      ++search_string_iterator;
    }
  }
  if (search_strings.size() == 0) {
    std::cout << "<span class=\"error\">" << "Error: There are no search strings." << "</span>" << std::endl;
    exit(-1);
  }
  std::sort(search_strings.begin(), search_strings.end());

  std::vector<snap::Expression> expressions;
  std::set<std::string> pattern_set;
  for (auto it = search_strings.begin(); it != search_strings.end(); ++it) {
    try {
      expressions.emplace_back(*it);
    } catch(snap::ExpressionSyntaxError &e) {
      const char *error_msg = e.what();
      std::cout << "<span class=\"error\">" << error_msg << "</span>" << std::endl;
      delete[] error_msg;
      exit(-1);
    }
    pattern_set.insert(expressions.back().patterns.begin(), expressions.back().patterns.end());
  }
  std::vector<std::string> patterns;
  patterns.insert(patterns.end(), pattern_set.begin(), pattern_set.end());

  // print output for user to verify
  std::cout << "<p>" << std::endl;
  std::cout << "Search strings:" << "<br/>" << std::endl;
  for (auto it = search_strings.begin(); it != search_strings.end(); ++it) {    
    std::cout << *it << "<br/>" << std::endl;
  }
  std::cout << "From (inclusive): <span id=\"from-date\">" << arguments["from-date"] << "</span><br/>" << std::endl;
  std::cout << "To (inclusive): <span id=\"to-date\">" << arguments["to-date"] << "</span><br/>" << std::endl;
  std::cout << "Number of Excerpts: " << arguments["num-excerpts"] << "<br/>" << std::endl;
  std::cout << "Excerpt Size: " << arguments["excerpt-size"] << "<br/>" << std::endl;
  std::cout << "</p>" << std::endl;

  // variables to store results of loop
  std::vector<std::vector<std::string>> search_results;
  std::vector<std::vector<std::string>> search_results_programs;
  std::vector<std::vector<std::string>> search_results_total_matches;
  int total_programs_cnt = 0;
  int selected_programs_cnt = 0;
  std::vector<std::string> corrupt_files;
  std::vector<std::string> missing_files;
  std::vector<snap::Excerpt> excerpts;

  // print table header
  std::cout << "<table><thead><tr><th>dt</th>";
  for (auto it = search_strings.begin(); it != search_strings.end(); ++it) {
    std::cout << "<th>" << (*it) + " Contexts" << "</th>";
  }
  std::cout << "<th>selected_programs_cnt</th></tr></thead><tbody>" << std::endl;
  snap::StringHasher hasher("", M, A);
  std::unordered_map<std::string, std::unordered_map<int, int>> total_left_word_hashes;
  std::unordered_map<std::string, std::unordered_map<int, int>> total_right_word_hashes;
  std::map<std::string, std::tuple<int, int, int>> match_counts;
  for (auto it = file_list.begin();
       it != file_list.end();
       ++it) {
    boost::gregorian::date current_date = snap::date::string_to_date((*it).substr(prefix.length(), 10));
    if (snap::io::file_exists(*it)) {
      std::vector<snap::Program> programs;
      try {
        programs = snap::io::parse_programs(*it);
      } catch (snap::io::CorruptFileException &e) {
        programs.clear();
        corrupt_files.push_back(*it);
        continue;
      }
      search_results.push_back(std::vector<std::string>{snap::date::date_to_string(current_date)});
      search_results_programs.push_back(std::vector<std::string>{snap::date::date_to_string(current_date)});
      search_results_total_matches.push_back(std::vector<std::string>{snap::date::date_to_string(current_date)});
      std::unordered_map<std::string, std::unordered_map<int, int>> daily_left_word_hashes;
      std::unordered_map<std::string, std::unordered_map<int, int>> daily_right_word_hashes;
      std::cout << "<tr><td>" << snap::date::date_to_string(current_date) << "</td>";      
      total_programs_cnt += programs.size();
      int daily_selected_programs_cnt = 0;
      std::map<std::string, std::tuple<int, int, int>> daily_match_counts;
      for (auto p = programs.begin(); p != programs.end(); ++p) {
        ++selected_programs_cnt;
        ++daily_selected_programs_cnt;
        hasher.load_text(p -> lower_text);
        std::map<std::string, std::vector<int>> raw_match_positions = snap::find(patterns, p -> lower_text);
        std::map<std::string, std::vector<int>> match_positions = snap::evaluate_expressions(expressions, raw_match_positions);
        for (auto ss = search_strings.begin(); ss != search_strings.end(); ++ss) {
          if (match_positions[*ss].size() > 0) {
            bool total_context_added = false;
            bool context_added = false;
            ++std::get<1>(daily_match_counts[*ss]);
            ++std::get<1>(match_counts[*ss]);
            std::get<2>(daily_match_counts[*ss]) += match_positions[*ss].size();
            std::get<2>(match_counts[*ss]) += match_positions[*ss].size();            
            for (auto it = match_positions[*ss].begin(); it != match_positions[*ss].end(); ++it) {
              int left_word_hash = hasher.hash(*it - LEFT_HASH_WIDTH, *it);
              int right_word_hash = hasher.hash(*it, *it + RIGHT_HASH_WIDTH);
              int daily_left_hash_cnt = daily_left_word_hashes[*ss][left_word_hash]++;
              int daily_right_hash_cnt = daily_right_word_hashes[*ss][right_word_hash]++;
              int total_left_hash_cnt = total_left_word_hashes[*ss][left_word_hash]++;
              int total_right_hash_cnt = total_right_word_hashes[*ss][right_word_hash]++;
              if (daily_left_hash_cnt == 0 && daily_right_hash_cnt == 0) {
                if (!context_added) {
                  ++std::get<0>(daily_match_counts[*ss]);
                  context_added = true;
                }
                if (total_left_hash_cnt == 0 && total_right_hash_cnt == 0) {
                  if (!total_context_added) {
                    ++std::get<0>(match_counts[*ss]);
                    total_context_added = true;
                  }
                  excerpts.emplace_back(*p, *it - excerpt_size, *it + excerpt_size);
                  std::vector<std::string> search_string_patterns = expressions[ss - search_strings.begin()].patterns;
                  for (auto pattern = search_string_patterns.begin(); pattern != search_string_patterns.end(); ++pattern) {
                    excerpts.back().highlight_word(*pattern);
                  }
                }
              } 
            }
          }
        }
      }
      for (auto ss = search_strings.begin(); ss != search_strings.end(); ++ss) {
        search_results.back().push_back(std::to_string(std::get<0>(daily_match_counts[*ss])));
        search_results_programs.back().push_back(std::to_string(std::get<1>(daily_match_counts[*ss])));
        search_results_total_matches.back().push_back(std::to_string(std::get<2>(daily_match_counts[*ss])));
        std::cout << "<td>" << std::get<0>(daily_match_counts[*ss]) << "</td>";
      }
      search_results.back().push_back(std::to_string(daily_selected_programs_cnt));
      search_results_programs.back().push_back(std::to_string(daily_selected_programs_cnt));
      search_results_total_matches.back().push_back(std::to_string(daily_selected_programs_cnt));
      std::cout << "<td>" << daily_selected_programs_cnt << "</td>";
      std::cout << "</tr>" << std::endl;      
      programs.clear();
    } else {
      missing_files.push_back(*it);
    }
  }
  // print out total line
  std::cout << "<tr>" << std::endl;
  search_results.emplace_back();
  search_results_programs.emplace_back();
  search_results_total_matches.emplace_back();
  std::cout << "<td><strong>Grand Total:</strong></td>" << std::endl;
  search_results.back().push_back("Grand Total:");
  search_results_programs.back().push_back("Grand Total:");
  search_results_total_matches.back().push_back("Grand Total:");
  for (std::string ss : search_strings) {
    std::cout << "<td>" << std::get<0>(match_counts[ss]) << "</td>" << std::endl;
    search_results.back().push_back(std::to_string(std::get<0>(match_counts[ss])));
    search_results_programs.back().push_back(std::to_string(std::get<1>(match_counts[ss])));
    search_results_total_matches.back().push_back(std::to_string(std::get<2>(match_counts[ss])));
  }
  std::cout << "<td>" << total_programs_cnt << "</td>" << std::endl;
  search_results.back().push_back(std::to_string(total_programs_cnt));
  std::cout << "</tr>" << std::endl;

  std::cout << "</tbody></table>" << std::endl;
  std::cout << "<div>";
  std::cout << "<br/>" << std::endl;
  snap::web::print_missing_files(missing_files);
  std::cout << "<br/>" << std::endl;
  snap::web::print_corrupt_files(corrupt_files);
  std::cout << "</div>" << std::endl;

  snap::web::print_excerpts(excerpts, num_excerpts, true);

  // output file
  srand(time(NULL));
  std::string random_id = std::to_string(rand());  
  output_matrix_file(search_results, search_strings, random_id, "contexts");
  output_matrix_file(search_results_programs, search_strings, random_id, "programs");
  output_matrix_file(search_results_total_matches, search_strings, random_id, "total_matches"); 
  // all data in long form
  std::map<std::string, std::tuple<std::string, std::string, std::string>> dict;
  if (snap::io::file_exists("dictionary.csv")) {
    std::ifstream dict_file("dictionary.csv");
    dict = snap::io::read_dictionary(dict_file);
  }
  std::string output_file_name = search_results.front().front() + "_all_" + random_id + ".csv";
  std::string output_file_path = output_path + output_file_name;
  std::ofstream output_file(output_file_path);
  output_file << "Date,Term,Contexts,Programs,Total Matches";
  for (int i = 0; i < search_results.size() - 1; ++i) { // skip total line
    for (int j = 0; j < search_strings.size(); ++j) {
      output_file << '\n';
      output_file << search_results[i].front() << ',' 
                  << (dict.count(search_strings[j]) ? std::get<1>(dict[search_strings[j]]) : search_strings[j]) << ','
                  << search_results[i][j + 1] << ',' // j + 1 skips date column
                  << search_results_programs[i][j + 1] << ','
                  << search_results_total_matches[i][j + 1];
    }
  }
  output_file.close();
  std::cout << "<p>";
  std::cout << snap::web::create_link(output_file_path, "Output Long File", "long-data");
  std::cout << "</p>" << std::endl;
  std::cout << "<p>";
  std::cout << snap::web::create_link("../time-series.html?filename=tmp%2F" + output_file_name + "&title=Snapstream%20Time%20Series",
                                      "Visualization", "visualization");
  std::cout << "</p>" << std::endl; 
  
  double duration = (std::clock() - start_time) / (double) CLOCKS_PER_SEC;
  std::cout << "<br/><span>Time taken (seconds): " << duration << "</span><br/>" << std::endl;
  snap::web::close_html();  
  return 0;
}