QStringList SpellChecker::suggestions(QString word) { QStringList ret; if(word.isEmpty()) return ret; AspellSpeller * checker; if(word[0].toLower()>=QChar('a') && word[0].toLower()<=QChar('z')){ checker = m_spell_checker_en; } else{ checker = m_spell_checker_ru; } if(!checker) return ret; const AspellWordList * suggestions = aspell_speller_suggest(checker, m_codec->fromUnicode(word).data(), -1); AspellStringEnumeration * elements = aspell_word_list_elements(suggestions); const char * suggestion; while ( suggestion = aspell_string_enumeration_next(elements) ) { ret << m_codec->toUnicode(suggestion); } delete_aspell_string_enumeration(elements); return ret; }
void AspellAdapterImpl::getSuggestionsForLastWord( SuggestionsList & suggestionsList, const std::string & word ) { const AspellWordList * suggestions = aspell_speller_suggest( aspellSpeller_, word.c_str(), -1); AspellStringEnumeration * elements = aspell_word_list_elements(suggestions); const char * currentWordSuggestion; while ( (currentWordSuggestion = aspell_string_enumeration_next(elements)) != NULL ) { if (limitCandidates_ && (limitCandidates_ <= suggestionsList.size())) { break; } suggestionsList.push_back(std::string(currentWordSuggestion)); } delete_aspell_string_enumeration(elements); return; }
int spellcheck_suggest(void * chk, char ***sug, const char * word) { struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk; if (!sug) { prt_error("Error: Aspell. Corrupt pointer.\n"); return 0; } if (aspell && aspell->speller) { const AspellWordList *list = NULL; AspellStringEnumeration *elem = NULL; const char *aword = NULL; unsigned int size, i; char **array = NULL; list = aspell_speller_suggest(aspell->speller, word, -1); elem = aspell_word_list_elements(list); size = aspell_word_list_size(list); /* allocate an array of char* for returning back to link-parser */ array = (char **)malloc(sizeof(char *) * size); if (!array) { prt_error("Error: Aspell. Out of memory.\n"); delete_aspell_string_enumeration(elem); return 0; } i = 0; while ((aword = aspell_string_enumeration_next(elem)) != NULL) { array[i++] = strdup(aword); } delete_aspell_string_enumeration(elem); *sug = array; return size; } return 0; }
/** * Utility function that wraps a list of words as ruby array of ruby strings. * @param list an aspell wordlist. * @return an ruby array, containing all words as ruby strings. */ static VALUE get_list(const AspellWordList *list) { VALUE result = rb_ary_new2(aspell_word_list_size(list)); if (list != 0) { AspellStringEnumeration * els = aspell_word_list_elements(list); const char * word; while ( (word = aspell_string_enumeration_next(els)) != 0) { rb_ary_push(result, rb_str_new2(word)); } delete_aspell_string_enumeration(els); } return result; }
QStringList Speller::suggestions(const char *word) { QStringList res; const AspellWordList *wl = aspell_speller_suggest(speller, word, -1); if (wl){ AspellStringEnumeration *els = aspell_word_list_elements(wl); const char *word; while ((word = aspell_string_enumeration_next(els)) != NULL) { res.append(QString::fromUtf8(word)); } } return res; }
void SpellCheck::suggestions(const QString &word, QStringList &list){ if (!spell_checker || word.isEmpty()) return; const AspellWordList *suggestions = aspell_speller_suggest(spell_checker, word.toUtf8().constData(), word.length()); AspellStringEnumeration *elements = aspell_word_list_elements(suggestions); const char * sugg; while ((sugg = aspell_string_enumeration_next(elements)) != NULL ){ list.append(QString::fromUtf8(sugg, strlen(sugg))); } delete_aspell_string_enumeration(elements); }
QList<QString> ASpellChecker::suggestions(const QString& word) { QList<QString> words; if (speller_) { const AspellWordList* list = aspell_speller_suggest(speller_, word.toUtf8(), -1); AspellStringEnumeration* elements = aspell_word_list_elements(list); const char *c_word; while ((c_word = aspell_string_enumeration_next(elements)) != NULL) { words += QString::fromUtf8(c_word); } delete_aspell_string_enumeration(elements); } return words; }
static void print_word_list(AspellSpeller * speller, const AspellWordList *wl, char delem) { if (wl == 0) { printf("Error: %s\n", aspell_speller_error_message(speller)); } else { AspellStringEnumeration * els = aspell_word_list_elements(wl); const char * word; while ( (word = aspell_string_enumeration_next(els)) != 0) { fputs(word, stdout); putc(delem, stdout); } } }
/* Remeber to free returned string */ char * spelling_document_line(struct spelling_document *sd, char *in_line) { char *newline; int diff, line_len; size_t line_size, conv_line; struct AspellToken token; char *line; size_t conv_in; line = spelling_conv(sd->spelling->conv, in_line); line_len = strlen(line); line_size = line_len + (line_len/10); if ((newline = malloc(line_size)) == NULL) return NULL; strcpy(newline, line); free(line); aspell_document_checker_process(sd->checker, newline, line_len); diff = 0; while (token = aspell_document_checker_next_misspelling(sd->checker), token.len != 0) { char *word_begin; const char *word; int word_len; const AspellWordList *wl; AspellStringEnumeration *els; word_begin = newline + token.offset + diff; wl = aspell_speller_suggest(sd->spelling->speller, word_begin, token.len); els = aspell_word_list_elements(wl); if ((word = aspell_string_enumeration_next(els)) == NULL) continue; word_len = strlen(word); diff += word_len - token.len; memmove(word_begin + word_len, word_begin + token.len, strlen(word_begin + token.len) + 1); memcpy(word_begin, word, word_len); line_len += diff; } line = spelling_conv(sd->spelling->conv_out, newline); free(newline); return line; }
QList<QString> ASpellChecker::suggestions(const QString& word) { QList<QString> words; foreach(AspellSpeller* speller, spellers_) { const AspellWordList* list = aspell_speller_suggest(speller, word.toUtf8(), -1); AspellStringEnumeration* elements = aspell_word_list_elements(list); const char *c_word; while ((c_word = aspell_string_enumeration_next(elements)) != NULL) { QString suggestion = QString::fromUtf8(c_word); if(suggestion.size() > 2) words.append(suggestion); } delete_aspell_string_enumeration(elements); } return words; }
void checkTheWord(char* word,int checkDoc) { #if 1 int correct; AspellWordList* suggestions; AspellStringEnumeration* elements; const char* suggestedword; int wordcnt=0; char* wordlist[100]; char* labeltext[512]; correct=aspell_speller_check(spellChecker,word,-1); if(!correct) { badWord=word; cancelCheck=false; if(spellCheckWord==NULL) buildWordCheck(checkDoc); else { for(int j=0; j<numWords; j++) gtk_combo_box_text_remove((GtkComboBoxText*)wordListDropbox,0); sprintf((char*)&labeltext,"Change <i><b>%s</b></i> to: ",badWord); gtk_label_set_text((GtkLabel*)badWordLabel,(char*)&labeltext); gtk_label_set_use_markup((GtkLabel*)badWordLabel,true); } suggestions=(AspellWordList*)aspell_speller_suggest(spellChecker,word,-1); elements=aspell_word_list_elements(suggestions); while((suggestedword=aspell_string_enumeration_next(elements))!=NULL) { wordlist[wordcnt]=strdup(suggestedword); gtk_combo_box_text_append_text((GtkComboBoxText*)wordListDropbox,wordlist[wordcnt]); wordcnt++; } numWords=wordcnt; delete_aspell_string_enumeration(elements); gtk_combo_box_set_active((GtkComboBox*)wordListDropbox,0); gtk_widget_show_all(spellCheckWord); gtk_dialog_run((GtkDialog *)spellCheckWord); } #endif }
QStringList KAspellChecker::suggestions(const QString &word) { if (!m_speller) return QStringList(); QStringList suggs; const AspellWordList *wordList = aspell_speller_suggest(m_speller, word.toLocal8Bit().data(), -1); if (wordList == 0) return suggs; AspellStringEnumeration *els = aspell_word_list_elements(wordList); const char *ws; while ((ws = aspell_string_enumeration_next(els)) != 0) suggs << QString::fromLatin1(ws); return suggs; }
//__________________________________________________________________________ void Speller::Aspell::Suggest::storeWordList(const AspellWordList* wlist, std::vector<std::string>& replacement) throw( std::invalid_argument ) { if( ! wlist ) { throw std::invalid_argument( "(Aspell.Speller.Suggest.store" "WordList): word list pointer " "is null." ); } AspellStringEnumeration* enum_list = aspell_word_list_elements( wlist ); const char* next; while( (next = aspell_string_enumeration_next( enum_list )) ) { replacement.push_back( next ); } delete_aspell_string_enumeration( enum_list ); }
//__________________________________________________________________________ void Speller::Aspell::Suggest::printWordList(const AspellWordList* wlist, char delim) throw( std::invalid_argument ) { if( ! wlist ) { throw std::invalid_argument( "(Aspell.Speller.Suggest.print" "WordList): word list pointer " "is null." ); } AspellStringEnumeration* enum_list = aspell_word_list_elements( wlist ); const char* next; while( (next = aspell_string_enumeration_next( enum_list )) ) { std::cout << next << delim; } delete_aspell_string_enumeration( enum_list ); }
/* helper function: converts an aspell word list into python list */ static PyObject* AspellWordList2PythonList(const AspellWordList* wordlist) { PyObject* list; AspellStringEnumeration* elements; const char* word; list = PyList_New(0); if (!list) { PyErr_SetString(PyExc_Exception, "can't create new list"); return NULL; } elements = aspell_word_list_elements(wordlist); while ( (word=aspell_string_enumeration_next(elements)) != 0) if (PyList_Append(list, Py_BuildValue("s", word)) == -1) { PyErr_SetString(PyExc_Exception, "It is almost impossible, but happend! Can't append element to the list."); delete_aspell_string_enumeration(elements); Py_DECREF(list); return NULL; } delete_aspell_string_enumeration(elements); return list; }
QStringList SpellChecker::suggestions() { QStringList sl; if ((spell_checker1 == 0) || (spell_checker2 == 0)) return sl; QTextCursor cursor = m_textEdit->textCursor(); cursor.select(QTextCursor::WordUnderCursor); QString word = cursor.selectedText(); QByteArray ba = word.toUtf8(); if ((aspell_speller_check(spell_checker2, ba.data(), ba.size()) != 0)||(aspell_speller_check(spell_checker1, ba.data(), ba.size()) != 0)) return sl; const struct AspellWordList *awl = aspell_speller_suggest(spell_checker1, ba.data(), ba.size()); if (aspell_word_list_size(awl) > 0) { struct AspellStringEnumeration *ase = aspell_word_list_elements(awl); int i = 0; while ((!aspell_string_enumeration_at_end(ase))&&(i < 10)) { const char *text = aspell_string_enumeration_next(ase); sl << QString::fromUtf8(text); i++; } delete_aspell_string_enumeration(ase); } return sl; }
GtkWidget* build_suggestion_menu(GtkSpell *spell, GtkTextBuffer *buffer, const char *word) { const char *suggestion; GtkWidget *topmenu, *menu; GtkWidget *mi; int count = 0; const AspellWordList *suggestions; AspellStringEnumeration *elements; char *label; topmenu = menu = gtk_menu_new(); /* + Add to Dictionary */ label = g_strdup_printf("Add \"%s\" to Dictionary", word); mi = gtk_image_menu_item_new_with_label(label); g_free(label); gtk_image_menu_item_set_image(GTK_IMAGE_MENU_ITEM(mi), gtk_image_new_from_stock(GTK_STOCK_ADD, GTK_ICON_SIZE_MENU)); g_signal_connect(G_OBJECT(mi), "activate", G_CALLBACK(add_to_dictionary), spell); gtk_widget_show_all(mi); gtk_menu_shell_append(GTK_MENU_SHELL(topmenu), mi); /* Separator */ mi = gtk_menu_item_new(); gtk_widget_show(mi); gtk_menu_shell_append(GTK_MENU_SHELL(topmenu), mi); suggestions = aspell_speller_suggest(spell->speller, word, -1); elements = aspell_word_list_elements(suggestions); suggestion = aspell_string_enumeration_next(elements); if (suggestion == NULL) { /* no suggestions. put something in the menu anyway... */ GtkWidget *label; label = gtk_label_new(""); gtk_label_set_markup(GTK_LABEL(label), "<i>(no suggestions)</i>"); mi = gtk_menu_item_new(); gtk_container_add(GTK_CONTAINER(mi), label); gtk_widget_show_all(mi); gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), mi); } else { /* build a set of menus with suggestions. */ while (suggestion != NULL) { if (count == 10) { mi = gtk_menu_item_new(); gtk_widget_show(mi); gtk_menu_shell_append(GTK_MENU_SHELL(menu), mi); mi = gtk_menu_item_new_with_label("More..."); gtk_widget_show(mi); gtk_menu_shell_append(GTK_MENU_SHELL(menu), mi); menu = gtk_menu_new(); gtk_menu_item_set_submenu(GTK_MENU_ITEM(mi), menu); count = 0; } mi = gtk_menu_item_new_with_label(suggestion); g_signal_connect(G_OBJECT(mi), "activate", G_CALLBACK(replace_word), spell); gtk_widget_show(mi); gtk_menu_shell_append(GTK_MENU_SHELL(menu), mi); count++; suggestion = aspell_string_enumeration_next(elements); } } delete_aspell_string_enumeration(elements); return topmenu; }
char * weechat_aspell_get_suggestions (struct t_aspell_speller_buffer *speller_buffer, const char *word) { int i, size, max_suggestions, num_suggestions; char *suggestions, *suggestions2; const char *ptr_word; #ifdef USE_ENCHANT char **elements; size_t num_elements; #else const AspellWordList *list; AspellStringEnumeration *elements; #endif max_suggestions = weechat_config_integer (weechat_aspell_config_check_suggestions); if (max_suggestions < 0) return NULL; size = 1; suggestions = malloc (size); if (!suggestions) return NULL; suggestions[0] = '\0'; if (speller_buffer->spellers) { for (i = 0; speller_buffer->spellers[i]; i++) { #ifdef USE_ENCHANT elements = enchant_dict_suggest (speller_buffer->spellers[i], word, -1, &num_elements); if (elements) { if (num_elements > 0) { num_suggestions = 0; while ((ptr_word = elements[num_suggestions]) != NULL) { size += strlen (ptr_word) + ((suggestions[0]) ? 1 : 0); suggestions2 = realloc (suggestions, size); if (!suggestions2) { free (suggestions); enchant_dict_free_string_list (speller_buffer->spellers[i], elements); return NULL; } suggestions = suggestions2; if (suggestions[0]) strcat (suggestions, (num_suggestions == 0) ? "/" : ","); strcat (suggestions, ptr_word); num_suggestions++; if ((max_suggestions >= 0) && (num_suggestions == max_suggestions)) break; } } enchant_dict_free_string_list (speller_buffer->spellers[i], elements); } #else list = aspell_speller_suggest (speller_buffer->spellers[i], word, -1); if (list) { elements = aspell_word_list_elements (list); num_suggestions = 0; while ((ptr_word = aspell_string_enumeration_next (elements)) != NULL) { size += strlen (ptr_word) + ((suggestions[0]) ? 1 : 0); suggestions2 = realloc (suggestions, size); if (!suggestions2) { free (suggestions); delete_aspell_string_enumeration (elements); return NULL; } suggestions = suggestions2; if (suggestions[0]) strcat (suggestions, (num_suggestions == 0) ? "/" : ","); strcat (suggestions, ptr_word); num_suggestions++; if ((max_suggestions >= 0) && (num_suggestions == max_suggestions)) break; } delete_aspell_string_enumeration (elements); } #endif } } /* no suggestions found */ if (!suggestions[0]) { free (suggestions); return NULL; } return suggestions; }
// runs in O(t^2) time where t is the number of tokens in the input corpus // We consider maxK to be fairly constant void rawr::compile(int maxK) { _maxK = maxK; std::vector<std::vector<token_id>> tokens; std::set<std::string> thashtags; std::set<std::string> fv_emoticons; std::ifstream fvefile("emoticons.txt"); if (fvefile) { std::string line; while (getline(fvefile, line)) { fv_emoticons.insert(line); emoticons.forms.add(line); } } fvefile.close(); std::map<std::string, std::string> canonical_form; AspellConfig* spell_config = new_aspell_config(); AspellCanHaveError* possible_err = new_aspell_speller(spell_config); if (aspell_error_number(possible_err) != 0) { std::cout << "aspell error: " << aspell_error_message(possible_err) << std::endl; exit(1); } AspellSpeller* spell_checker = to_aspell_speller(possible_err); std::cout << "Reading emojis..." << std::endl; prefix_search emojis; std::ifstream emoji_file("emojis.txt"); if (emoji_file) { while (!emoji_file.eof()) { std::string rawmojis; getline(emoji_file, rawmojis); if (rawmojis.back() == '\r') { rawmojis.pop_back(); } emojis.add(rawmojis); } emoji_file.close(); } std::cout << "Tokenizing corpus... 0%" << std::flush; int len = 0; for (auto c : _corpora) { len += c.length(); } int startper = 0; int per = 0; int perprime = 0; std::cout.fill(' '); for (int i = 0; i < _corpora.size(); i++) { size_t start = 0; int end = 0; std::vector<token_id> tkcor; while (end != std::string::npos) { perprime = (startper + end) * 100 / len; if (perprime != per) { per = perprime; std::cout << "\b\b\b\b" << std::right; std::cout.width(3); std::cout << per << "%" << std::flush; } end = _corpora[i].find_first_of(" \n", start); bool emoji = false; std::string te = _corpora[i].substr(start, (end == std::string::npos) ? std::string::npos : end - start + 1); std::string t = ""; if (te.compare("") && te.compare(".") && te.compare(" ")) { if (te.back() == ' ') { te.pop_back(); } // Extract strings of emojis into their own tokens even if they're not space delimited int m = emojis.match(te); emoji = m > 0; if (m == 0) m = 1; t = te.substr(0,m); te = te.substr(m); while (!te.empty()) { m = emojis.match(te); if (emoji == (m > 0)) { if (m == 0) m = 1; t += te.substr(0,m); te = te.substr(m); } else { end = start + t.length() - 1; break; } } std::string tc(t); std::transform(tc.begin(), tc.end(), tc.begin(), ::tolower); int pst = tc.find_first_not_of("\"([*"); int dst = tc.find_last_not_of("\")]*.,?!\n;:"); std::string canonical(""); if ((pst != std::string::npos) && (dst != std::string::npos)) { canonical = std::string(tc, pst, dst - pst + 1); } word& w = ([&] () -> word& { // Hashtag freevar if (canonical[0] == '#') { thashtags.insert(canonical); return hashtags; } // Emoticon freevar if (emoji) { emoticons.forms.add(canonical); return emoticons; } if ((pst != std::string::npos) && (dst != std::string::npos)) { std::string emoticon_canon(t, pst, t.find_last_not_of("\"]*\n.,?!") - pst + 1); if (fv_emoticons.count(emoticon_canon) == 1) { emoticons.forms.add(emoticon_canon); return emoticons; } } // Basically any other word if (canonical_form.count(canonical) == 0) { if ( // Legacy freevars should be distinct from tokens containing similar words (canonical.find("$name$") != std::string::npos) // Words with no letters will be mangled by the spell checker || (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos) ) { canonical_form[canonical] = canonical; words.emplace(canonical, canonical); } else { int correct = aspell_speller_check(spell_checker, canonical.c_str(), canonical.size()); if (correct) { words.emplace(canonical, canonical); canonical_form[canonical] = canonical; } else { const AspellWordList* suggestions = aspell_speller_suggest(spell_checker, canonical.c_str(), canonical.size()); AspellStringEnumeration* elements = aspell_word_list_elements(suggestions); const char* replacement = aspell_string_enumeration_next(elements); if (replacement != NULL) { std::string sugrep(replacement); canonical_form[canonical] = sugrep; if (words.count(sugrep) == 0) { words.emplace(sugrep, sugrep); } } else { words.emplace(canonical, canonical); canonical_form[canonical] = canonical; } delete_aspell_string_enumeration(elements); } } } word& tw = words.at(canonical_form.at(canonical)); tw.forms.add(canonical); return tw; })(); token tk(w); tk.raw = t; for (char c : t) { if (c == '*') { tk.delimiters[{parentype::asterisk, doublestatus::opening}]++; } else if (c == '[') { tk.delimiters[{parentype::square_bracket, doublestatus::opening}]++; } else if (c == '(') { tk.delimiters[{parentype::paren, doublestatus::opening}]++; } else if (c == '"') { tk.delimiters[{parentype::quote, doublestatus::opening}]++; } else { break; } } int backtrack = t.find_last_not_of(".,?!])\"*\n;:") + 1; if (backtrack != t.length()) { std::string ending = t.substr(backtrack); std::string suffix; bool newline = false; bool terminating = false; for (char c : ending) { if ((c == '.') || (c == ',') || (c == '?') || (c == '!') || (c == ';') || (c == ':')) { suffix += c; terminating = true; continue; } else if (c == '\n') { newline = true; terminating = true; continue; } parentype pt = ([&] { switch (c) { case ']': return parentype::square_bracket; case ')': return parentype::paren; case '*': return parentype::asterisk; case '"': return parentype::quote; } })(); if (tk.delimiters[{pt, doublestatus::opening}] > 0) { tk.delimiters[{pt, doublestatus::opening}]--; tk.delimiters[{pt, doublestatus::both}]++; } else { tk.delimiters[{pt, doublestatus::closing}]++; } } if (terminating) { if ((suffix == ",") && (!newline)) { tk.suffix = suffixtype::comma; } else { tk.suffix = suffixtype::terminating; if (!newline) { w.terms.add({suffix, false}); } else { w.terms.add({".", false}); } } } } tkcor.push_back(_tokenstore.add(tk)); } start = ((end > (std::string::npos - 1) ) ? std::string::npos : end + 1); } tokens.push_back(tkcor); startper += _corpora[i].length(); } std::cout << "\b\b\b\b100%" << std::endl; delete_aspell_speller(spell_checker); delete_aspell_config(spell_config); std::cout << canonical_form.size() << " distinct forms" << std::endl; std::cout << words.size() << " distinct words" << std::endl; // Time to condense the distribution stuff for the words std::cout << "Compiling token histograms..." << std::endl; for (auto& it : words) { it.second.forms.compile(); it.second.terms.compile(); } // Hashtag freevar is not frequency distributed for (auto& it : thashtags) { hashtags.forms.add(it); } hashtags.forms.compile(); hashtags.terms.compile(); // Compile other freevars emoticons.forms.compile(); emoticons.terms.compile(); // Compile the interned tokens. _tokenstore.compile(); // kgram distribution std::cout << "Creating markov chain... 0%" << std::flush; std::map<kgram, std::map<token_id, token_data> > tstats; len = 0; for (auto c : tokens) { len += (maxK-1) * c.size(); } startper = 0; per = 0; perprime = 0; int corpid = 0; for (auto corpus : tokens) { for (int k=0; k<maxK && k<corpus.size(); k++) { // The zero'th token should be a terminator. token_id fid = corpus[k]; const token& f = _tokenstore.get(fid); kgram term_prefix(corpus.begin(), corpus.begin()+k); term_prefix.push_front(wildcardQuery); if (tstats[term_prefix].count(fid) == 0) { tstats[term_prefix].emplace(fid, fid); } token_data& td2 = tstats[term_prefix].at(fid); td2.all++; td2.corpora.insert(corpid); if (std::find_if(f.raw.begin(), f.raw.end(), ::islower) == f.raw.end()) { td2.uppercase++; } else if (isupper(f.raw[0])) { td2.titlecase++; } } for (int k=1; k<maxK && k<corpus.size(); k++) { for (int i=0; i<(corpus.size() - k); i++) { perprime = (startper+i) * 100 / len; if (perprime != per) { per = perprime; std::cout << "\b\b\b\b" << std::right; std::cout.width(3); std::cout << per << "%" << std::flush; } kgram prefix(corpus.begin()+i, corpus.begin()+i+k); token_id fid = corpus[i+k]; const token& f = _tokenstore.get(fid); if (tstats[prefix].count(fid) == 0) { tstats[prefix].emplace(fid, fid); } token_data& td = tstats[prefix].at(fid); td.all++; td.corpora.insert(corpid); if (std::find_if(f.raw.begin(), f.raw.end(), ::islower) == f.raw.end()) { td.uppercase++; } else if (isupper(f.raw[0])) { td.titlecase++; } const token& startTok = _tokenstore.get(std::begin(prefix)->tok); if (startTok.suffix == suffixtype::terminating) { kgram term_prefix(prefix); term_prefix.pop_front(); term_prefix.push_front(wildcardQuery); if (tstats[term_prefix].count(fid) == 0) { tstats[term_prefix].emplace(fid, fid); } token_data& td2 = tstats[term_prefix].at(fid); td2.all++; td2.corpora.insert(corpid); if (std::find_if(f.raw.begin(), f.raw.end(), ::islower) == f.raw.end()) { td2.uppercase++; } else if (isupper(f.raw[0])) { td2.titlecase++; } } } startper += corpus.size(); } corpid++; } std::cout << "\b\b\b\b100%" << std::endl; // Condense the kgram distribution std::cout << "Compiling kgram distributions... 0%"; len = tstats.size(); per = 0; perprime = 0; int indicator = 0; for (auto& it : tstats) { indicator++; perprime = indicator * 100 / len; if (per != perprime) { per = perprime; std::cout << "\b\b\b\b" << std::right; std::cout.width(3); std::cout << per << "%" << std::flush; } kgram klist = it.first; auto& probtable = it.second; auto& distribution = _stats[klist]; int max = 0; for (auto& kt : probtable) { max += kt.second.all; distribution.emplace(max, kt.second); } } std::cout << "\b\b\b\b100%" << std::endl; _compiled = true; }