SpellChecker::SpellChecker( QObject * parent ) : QObject(parent) { m_spell_config_ru = new_aspell_config(); aspell_config_replace(m_spell_config_ru, "dict-dir", "./dict"); aspell_config_replace(m_spell_config_ru, "encoding", "utf-8"); aspell_config_replace(m_spell_config_ru, "lang", "ru"); AspellCanHaveError * possible_err = new_aspell_speller(m_spell_config_ru); m_spell_checker_ru = 0; if (aspell_error_number(possible_err) != 0){ puts(aspell_error_message(possible_err)); } else{ m_spell_checker_ru = to_aspell_speller(possible_err); } m_spell_config_en = new_aspell_config(); aspell_config_replace(m_spell_config_en, "dict-dir", "./dict"); aspell_config_replace(m_spell_config_en, "encoding", "utf-8"); aspell_config_replace(m_spell_config_en, "lang", "en"); possible_err = new_aspell_speller(m_spell_config_en); m_spell_checker_en = 0; if (aspell_error_number(possible_err) != 0){ puts(aspell_error_message(possible_err)); } else{ m_spell_checker_en = to_aspell_speller(possible_err); } m_codec = QTextCodec::codecForName("UTF-8"); }
/** * Ctor for aspell objects. * This is a custom constructor and takes a hash of config options: key, value pairs. * Common use: * * a = Aspell.new({"lang"=>"de", "jargon"=>"berlin"}) * * For a list of config options, see aspell manual. * @param options hash of options */ static VALUE aspell_s_new1(VALUE klass, VALUE options) { //aspell values AspellCanHaveError * ret; AspellSpeller * speller; AspellConfig * config; //create new config config = new_aspell_config(); //set options set_options(config, options); //create speller: ret = new_aspell_speller(config); delete_aspell_config(config); if (aspell_error(ret) != 0) { const char *tmp = strdup(aspell_error_message(ret)); delete_aspell_can_have_error(ret); rb_raise(cAspellError, "%s", tmp); } speller = to_aspell_speller(ret); //wrap pointer return Data_Wrap_Struct(klass, 0, aspell_free, speller); }
SpellCheck::SpellCheck(QObject *parent) : QObject(parent), config(NULL), spell_checker(NULL) { config = new_aspell_config(); aspell_config_replace(config, "encoding", "utf-8"); aspell_config_replace(config, "personal", (dcpp::Util::getPath(dcpp::Util::PATH_USER_CONFIG)+"dict").c_str()); #if defined(Q_WS_WIN) aspell_config_replace(config, "data-dir", "./aspell/data"); aspell_config_replace(config, "dict-dir", "./aspell/dict"); #endif if (config) { AspellCanHaveError *error = new_aspell_speller(config); if (aspell_error(error)) { delete_aspell_config(config); printf("%s\n", aspell_error_message(error)); config = NULL; } else spell_checker = to_aspell_speller(error); } }
bool KAspellChecker::init() { QString locale = QString(QLocale::system().name()).left(2); if (locale.length() < 2) locale = "en"; AspellConfig * config = new_aspell_config(); aspell_config_replace(config, "lang", locale.toLocal8Bit().data()); AspellCanHaveError * ret = new_aspell_speller(config); delete_aspell_config(config); if (aspell_error(ret) != 0) { qDebug("Error: %s\n",aspell_error_message(ret)); delete_aspell_can_have_error(ret); return false; } m_speller = to_aspell_speller(ret); config = aspell_speller_config(m_speller); qDebug() << "USING LANG= " << aspell_config_retrieve(config, "lang"); return true; }
bool SpellChecker::addCheckedLang(QString &name) { if (checkers.find(name) != checkers.end()) return true; aspell_config_replace(spellConfig, "lang", name.toAscii()); // create spell checker using prepared configuration AspellCanHaveError* possibleErr = new_aspell_speller(spellConfig); if (aspell_error_number(possibleErr) != 0) { MessageBox::msg(aspell_error_message(possibleErr)); return false; } else checkers[name] = to_aspell_speller(possibleErr); if (checkers.size() == 1) { foreach(ChatWidget *chat, ChatWidgetManager::instance()->chats()) chatCreated(chat); } return true; }
int main(int argc,char **argv) { #ifdef _USEQT5_ QApplication app(argc,argv); #endif AspellCanHaveError* possible_err; aspellConfig=new_aspell_config(); possible_err=new_aspell_speller(aspellConfig); if(aspell_error_number(possible_err)!= 0) puts(aspell_error_message(possible_err)); else spellChecker=to_aspell_speller(possible_err); #ifndef _USEQT5_ gtk_init(&argc,&argv); buildMainGuiGtk(); gtk_window_stick(GTK_WINDOW(window)); gtk_window_set_keep_above((GtkWindow*)window,true); gtk_widget_show_all(window); gtk_main(); #else holdapp=&app; buildMainGuiQt(); window->show(); app.exec(); #endif }
/** * Generate a document checker object from a given speller. * @param speller the speller that shall chech a document. * @return a fresh document checker. */ static AspellDocumentChecker* get_checker(AspellSpeller *speller) { AspellCanHaveError * ret; AspellDocumentChecker * checker; ret = new_aspell_document_checker(speller); if (aspell_error(ret) != 0) rb_raise(cAspellError, "%s" ,aspell_error_message(ret)); checker = to_aspell_document_checker(ret); return checker; }
AspellSpeller * weechat_aspell_speller_new (const char *lang) { AspellConfig *config; AspellCanHaveError *ret; AspellSpeller *new_speller; struct t_infolist *infolist; if (!lang) return NULL; if (weechat_aspell_plugin->debug) { weechat_printf (NULL, "%s: creating new speller for lang \"%s\"", ASPELL_PLUGIN_NAME, lang); } /* create a speller instance for the newly created cell */ config = new_aspell_config(); aspell_config_replace (config, "lang", lang); /* apply all options on speller */ infolist = weechat_infolist_get ("option", NULL, "aspell.option.*"); if (infolist) { while (weechat_infolist_next (infolist)) { aspell_config_replace (config, weechat_infolist_string (infolist, "option_name"), weechat_infolist_string (infolist, "value")); } weechat_infolist_free (infolist); } ret = new_aspell_speller (config); if (aspell_error (ret) != 0) { weechat_printf (NULL, "%s%s: error: %s", weechat_prefix ("error"), ASPELL_PLUGIN_NAME, aspell_error_message (ret)); delete_aspell_config (config); delete_aspell_can_have_error (ret); return NULL; } new_speller = to_aspell_speller (ret); weechat_hashtable_set (weechat_aspell_spellers, lang, new_speller); /* free configuration */ delete_aspell_config (config); return new_speller; }
void AspellAdapterImpl::createAspellInstance() { AspellCanHaveError * possibleError = new_aspell_speller(aspellConfig_); if (aspell_error_number(possibleError) != 0) { ERROR("ASPELL CREATION ERROR: " << aspell_error_message(possibleError)); // @todo // throw PsiException(aspell_error_message(possibleError)); } else { aspellSpeller_ = to_aspell_speller(possibleError); } }
static gboolean gtkspell_set_language_internal(GtkSpell *spell, const gchar *lang, GError **error) { AspellConfig *config; AspellCanHaveError *err; if (lang == NULL) { lang = g_getenv("LANG"); if (lang) { if (g_strncasecmp(lang, "C", 1) == 0) lang = NULL; else if (lang[0] == 0) lang = NULL; } } config = new_aspell_config(); if (lang) aspell_config_replace(config, "language-tag", lang); aspell_config_replace(config, "encoding", "utf-8"); err = new_aspell_speller(config); delete_aspell_config(config); if (aspell_error_number(err) != 0) { #ifdef USING_ASPELL g_set_error(error, GTKSPELL_ERROR, GTKSPELL_ERROR_BACKEND, "aspell: %s", aspell_error_message(err)); #elif defined USING_PSPELL g_set_error(error, GTKSPELL_ERROR, GTKSPELL_ERROR_BACKEND, "pspell: %s", aspell_error_message(err)); #endif return FALSE; } if (spell->speller) delete_aspell_speller(spell->speller); spell->speller = to_aspell_speller(err); return TRUE; }
Speller::Speller(SpellerConfig *cfg) : m_base(cfg->m_base) { speller = NULL; if (cfg->cfg){ AspellCanHaveError *ret = new_aspell_speller(cfg->cfg); if (aspell_error(ret) != 0){ log(L_WARN, "Spell: %s", aspell_error_message(ret)); delete_aspell_can_have_error(ret); return; } speller = to_aspell_speller(ret); } }
void initAspell() { spell_config = new_aspell_config(); aspell_config_replace(spell_config, "lang", "en_US"); //set language possible_err = new_aspell_speller(spell_config); spell_checker = 0; if (aspell_error_number(possible_err) != 0) { printf("%s ", aspell_error_message(possible_err)); } else { printf("Unscrambled words:\n"); spell_checker = to_aspell_speller(possible_err); } }
void init(void) { char* filename; #ifdef _ASPELL_ AspellCanHaveError* possible_err; #endif lineWrap=true; highLight=true; useUnderline=true; tabWidth=4; fontAndSize=strdup("mono 10"); terminalCommand=strdup("xterm -e"); windowWidth=800; windowHeight=400; windowX=-1; windowY=-1; wrapSearch=true; insensitiveSearch=true; replaceAll=false; showLiveSearch=true; gzipPages=false; asprintf(&filename,"%s/.ManPageEditor",getenv("HOME")); g_mkdir_with_parents(filename,493); g_free(filename); readConfig(); tmpGzipPages=gzipPages; tmpHighLight=highLight; tmpLineWrap=lineWrap; tmpTabWidth=tabWidth; tmpUseUnderline=useUnderline; tmpShowLiveSearch=showLiveSearch; #ifdef _ASPELL_ aspellConfig=new_aspell_config(); possible_err=new_aspell_speller(aspellConfig); if(aspell_error_number(possible_err)!= 0) puts(aspell_error_message(possible_err)); else spellChecker=to_aspell_speller(possible_err); #endif }
void ASpellChecker::setActiveLanguages(const QList<QString>& langs) { clearSpellers(); foreach(const QString& lang, langs) { AspellConfig* conf = aspell_config_clone(config_); aspell_config_replace(conf, "lang", lang.toUtf8().constData()); AspellCanHaveError* ret = new_aspell_speller(conf); if (aspell_error_number(ret) == 0) { spellers_.append(to_aspell_speller(ret)); } else { qDebug() << QString("Aspell error: %1").arg(aspell_error_message(ret)); } delete_aspell_config(conf); }
SpellCheck::SpellCheck(QObject *parent) : QObject(parent), config(NULL), spell_checker(NULL) { config = new_aspell_config(); aspell_config_replace(config, "encoding", "utf-8"); aspell_config_replace(config, "personal", (QDir::homePath()+QDir::separator()+".eiskaltdc++"+QDir::separator()+"dict").toAscii().constData()); if (config){ /*const AspellDictInfoList *dicts = get_aspell_dict_info_list(config); AspellDictInfoEnumeration *enumer = aspell_dict_info_list_elements(dicts); const AspellDictInfo *info = NULL; QStringList all; while ((info = aspell_dict_info_enumeration_next(enumer)) != NULL) all.append(QString::fromUtf8(info->code, strlen(info->code))); if (WSGET(WS_APP_ASPELL_LANG).isEmpty()){ QString lc_prefix = QLocale::system().name(); if (all.contains(lc_prefix))//Loading dictionary from system locale aspell_config_replace(config, "lang", lc_prefix.toAscii().constData()); else if (all.contains(lc_prefix.left(lc_prefix.indexOf("_")))) { aspell_config_replace(config, "lang", lc_prefix.left(lc_prefix.indexOf("_")).toAscii().constData()); } } else aspell_config_replace(config, "lang", WSGET(WS_APP_ASPELL_LANG).toAscii().constData());*/ AspellCanHaveError *error = new_aspell_speller(config); if (aspell_error(error) != 0){ delete_aspell_config(config); printf("%s\n", aspell_error_message(error)); config = NULL; } else spell_checker = to_aspell_speller(error); } }
void ASpellChecker::setActiveLanguages(const QSet<LanguageManager::LangId>& langs) { clearSpellers(); for(auto const &lang: langs) { AspellConfig* conf = aspell_config_clone(config_); aspell_config_replace(conf, "lang", LanguageManager::toString(lang) .replace(QLatin1Char('-'),QLatin1Char('_')).toUtf8().constData()); AspellCanHaveError* ret = new_aspell_speller(conf); if (aspell_error_number(ret) == 0) { spellers_.append(to_aspell_speller(ret)); } else { qDebug() << QString("Aspell error: %1").arg(aspell_error_message(ret)); } delete_aspell_config(conf); } }
ASpellChecker::ASpellChecker() { config_ = NULL; speller_ = NULL; config_ = new_aspell_config(); aspell_config_replace(config_, "encoding", "utf-8"); #ifdef Q_WS_WIN aspell_config_replace(config_, "conf-dir", QDir::homeDirPath()); aspell_config_replace(config_, "data-dir", QString("%1/aspell/data").arg(QCoreApplication::applicationDirPath())); aspell_config_replace(config_, "dict-dir", QString("%1/aspell/dict").arg(QCoreApplication::applicationDirPath())); #endif AspellCanHaveError* ret = new_aspell_speller(config_); if (aspell_error_number(ret) == 0) { speller_ = to_aspell_speller(ret); } else { qWarning(QString("Aspell error: %1").arg(aspell_error_message(ret)).toAscii()); } }
/** * Create a spell checker for a language * @param language the language code e.g. en_GB or it * @return a checker or NULL */ static checker *checker_create( const char *language ) { int err = 0; checker *c = calloc( 1, sizeof(checker) ); if ( c != NULL ) { strncpy(c->lang,language,24); c->spell_config = new_aspell_config(); if ( c->spell_config != NULL ) { aspell_config_replace( c->spell_config, "lang", language ); AspellCanHaveError *possible_err = new_aspell_speller(c->spell_config); c->spell_checker = 0; if (aspell_error_number(possible_err) != 0) { fprintf(stderr,"%s\n",aspell_error_message(possible_err)); err = 1; } else { c->spell_checker = to_aspell_speller(possible_err); if ( c->spell_checker == NULL ) { fprintf(stderr,"checker: failed to initialise speller\n"); err = 1; } } if ( err ) { checker_dispose( c ); c = NULL; } } else fprintf(stderr,"checker: failed to create speller\n"); } else fprintf(stderr,"checker: failed to create object\n"); return c; }
/** * create a neew spell-checker for the language 'lang' */ void * spellcheck_create(const char * lang) { struct linkgrammar_aspell *aspell = NULL; size_t i = 0; AspellCanHaveError *spell_err = NULL; for (i = 0; i < sizeof(spellcheck_lang_mapping)/sizeof(char *); i += 2) { if (0 != strcmp(lang, spellcheck_lang_mapping[i])) continue; aspell = (struct linkgrammar_aspell *)malloc(sizeof(struct linkgrammar_aspell)); if (!aspell) { prt_error("Error: out of memory. Aspell not used.\n"); aspell = NULL; break; } aspell->config = NULL; aspell->speller = NULL; aspell->config = new_aspell_config(); if (aspell_config_replace(aspell->config, ASPELL_LANG_KEY, spellcheck_lang_mapping[i]) == 0) { prt_error("Error: failed to set language in aspell: %s\n", lang); delete_aspell_config(aspell->config); free(aspell); aspell = NULL; break; } spell_err = new_aspell_speller(aspell->config); if (aspell_error_number(spell_err) != 0) { prt_error("Error: Aspell: %s\n", aspell_error_message(spell_err)); delete_aspell_can_have_error(spell_err); delete_aspell_config(aspell->config); free(aspell); aspell = NULL; break; } aspell->speller = to_aspell_speller(spell_err); break; } return aspell; }
/** * Ctor for aspell objects: * Aspell.new(language, jargon, size, encoding) * Please note: All parameters are optional. If a parameter is omitted, a default value is assumed from * the environment (eg lang from $LANG). To retain default values, you can use nil * as value: to set only size: Aspell.new(nil, nil, "80") * @param language ISO639 language code plus optional ISO 3166 counry code as string (eg: "de" or "us_US") * @param jargon a special jargon of the selected language * @param size the size of the dictionary to chose (if there are options) * @param encoding the encoding to use * @exception Exception if the specified dictionary is not found. */ static VALUE aspell_s_new(int argc, VALUE *argv, VALUE klass) { VALUE vlang, vjargon, vsize, vencoding; const char *tmp; //aspell values AspellCanHaveError * ret; AspellSpeller * speller; AspellConfig * config; //create new config config = new_aspell_config(); //extract values rb_scan_args(argc, argv, "04", &vlang, &vjargon, &vsize, &vencoding); //language: if (RTEST(vlang)) set_option(config, "lang", STR2CSTR(vlang)); //jargon: if (RTEST(vjargon)) set_option(config, "jargon", STR2CSTR(vjargon)); //size: if (RTEST(vsize)) set_option(config, "size", STR2CSTR(vsize)); //encoding: if (RTEST(vencoding)) set_option(config, "encoding", STR2CSTR(vencoding)); //create speller: ret = new_aspell_speller(config); delete_aspell_config(config); if (aspell_error(ret) != 0) { tmp = strdup(aspell_error_message(ret)); delete_aspell_can_have_error(ret); rb_raise(cAspellError, "%s", tmp); } speller = to_aspell_speller(ret); //wrap pointer return Data_Wrap_Struct(klass, 0, aspell_free, speller); }
// runs in O(t^2) time where t is the number of tokens in the input corpus // We consider maxK to be fairly constant void rawr::compile(int maxK) { _maxK = maxK; std::vector<std::vector<token_id>> tokens; std::set<std::string> thashtags; std::set<std::string> fv_emoticons; std::ifstream fvefile("emoticons.txt"); if (fvefile) { std::string line; while (getline(fvefile, line)) { fv_emoticons.insert(line); emoticons.forms.add(line); } } fvefile.close(); std::map<std::string, std::string> canonical_form; AspellConfig* spell_config = new_aspell_config(); AspellCanHaveError* possible_err = new_aspell_speller(spell_config); if (aspell_error_number(possible_err) != 0) { std::cout << "aspell error: " << aspell_error_message(possible_err) << std::endl; exit(1); } AspellSpeller* spell_checker = to_aspell_speller(possible_err); std::cout << "Reading emojis..." << std::endl; prefix_search emojis; std::ifstream emoji_file("emojis.txt"); if (emoji_file) { while (!emoji_file.eof()) { std::string rawmojis; getline(emoji_file, rawmojis); if (rawmojis.back() == '\r') { rawmojis.pop_back(); } emojis.add(rawmojis); } emoji_file.close(); } std::cout << "Tokenizing corpus... 0%" << std::flush; int len = 0; for (auto c : _corpora) { len += c.length(); } int startper = 0; int per = 0; int perprime = 0; std::cout.fill(' '); for (int i = 0; i < _corpora.size(); i++) { size_t start = 0; int end = 0; std::vector<token_id> tkcor; while (end != std::string::npos) { perprime = (startper + end) * 100 / len; if (perprime != per) { per = perprime; std::cout << "\b\b\b\b" << std::right; std::cout.width(3); std::cout << per << "%" << std::flush; } end = _corpora[i].find_first_of(" \n", start); bool emoji = false; std::string te = _corpora[i].substr(start, (end == std::string::npos) ? std::string::npos : end - start + 1); std::string t = ""; if (te.compare("") && te.compare(".") && te.compare(" ")) { if (te.back() == ' ') { te.pop_back(); } // Extract strings of emojis into their own tokens even if they're not space delimited int m = emojis.match(te); emoji = m > 0; if (m == 0) m = 1; t = te.substr(0,m); te = te.substr(m); while (!te.empty()) { m = emojis.match(te); if (emoji == (m > 0)) { if (m == 0) m = 1; t += te.substr(0,m); te = te.substr(m); } else { end = start + t.length() - 1; break; } } std::string tc(t); std::transform(tc.begin(), tc.end(), tc.begin(), ::tolower); int pst = tc.find_first_not_of("\"([*"); int dst = tc.find_last_not_of("\")]*.,?!\n;:"); std::string canonical(""); if ((pst != std::string::npos) && (dst != std::string::npos)) { canonical = std::string(tc, pst, dst - pst + 1); } word& w = ([&] () -> word& { // Hashtag freevar if (canonical[0] == '#') { thashtags.insert(canonical); return hashtags; } // Emoticon freevar if (emoji) { emoticons.forms.add(canonical); return emoticons; } if ((pst != std::string::npos) && (dst != std::string::npos)) { std::string emoticon_canon(t, pst, t.find_last_not_of("\"]*\n.,?!") - pst + 1); if (fv_emoticons.count(emoticon_canon) == 1) { emoticons.forms.add(emoticon_canon); return emoticons; } } // Basically any other word if (canonical_form.count(canonical) == 0) { if ( // Legacy freevars should be distinct from tokens containing similar words (canonical.find("$name$") != std::string::npos) // Words with no letters will be mangled by the spell checker || (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos) ) { canonical_form[canonical] = canonical; words.emplace(canonical, canonical); } else { int correct = aspell_speller_check(spell_checker, canonical.c_str(), canonical.size()); if (correct) { words.emplace(canonical, canonical); canonical_form[canonical] = canonical; } else { const AspellWordList* suggestions = aspell_speller_suggest(spell_checker, canonical.c_str(), canonical.size()); AspellStringEnumeration* elements = aspell_word_list_elements(suggestions); const char* replacement = aspell_string_enumeration_next(elements); if (replacement != NULL) { std::string sugrep(replacement); canonical_form[canonical] = sugrep; if (words.count(sugrep) == 0) { words.emplace(sugrep, sugrep); } } else { words.emplace(canonical, canonical); canonical_form[canonical] = canonical; } delete_aspell_string_enumeration(elements); } } } word& tw = words.at(canonical_form.at(canonical)); tw.forms.add(canonical); return tw; })(); token tk(w); tk.raw = t; for (char c : t) { if (c == '*') { tk.delimiters[{parentype::asterisk, doublestatus::opening}]++; } else if (c == '[') { tk.delimiters[{parentype::square_bracket, doublestatus::opening}]++; } else if (c == '(') { tk.delimiters[{parentype::paren, doublestatus::opening}]++; } else if (c == '"') { tk.delimiters[{parentype::quote, doublestatus::opening}]++; } else { break; } } int backtrack = t.find_last_not_of(".,?!])\"*\n;:") + 1; if (backtrack != t.length()) { std::string ending = t.substr(backtrack); std::string suffix; bool newline = false; bool terminating = false; for (char c : ending) { if ((c == '.') || (c == ',') || (c == '?') || (c == '!') || (c == ';') || (c == ':')) { suffix += c; terminating = true; continue; } else if (c == '\n') { newline = true; terminating = true; continue; } parentype pt = ([&] { switch (c) { case ']': return parentype::square_bracket; case ')': return parentype::paren; case '*': return parentype::asterisk; case '"': return parentype::quote; } })(); if (tk.delimiters[{pt, doublestatus::opening}] > 0) { tk.delimiters[{pt, doublestatus::opening}]--; tk.delimiters[{pt, doublestatus::both}]++; } else { tk.delimiters[{pt, doublestatus::closing}]++; } } if (terminating) { if ((suffix == ",") && (!newline)) { tk.suffix = suffixtype::comma; } else { tk.suffix = suffixtype::terminating; if (!newline) { w.terms.add({suffix, false}); } else { w.terms.add({".", false}); } } } } tkcor.push_back(_tokenstore.add(tk)); } start = ((end > (std::string::npos - 1) ) ? std::string::npos : end + 1); } tokens.push_back(tkcor); startper += _corpora[i].length(); } std::cout << "\b\b\b\b100%" << std::endl; delete_aspell_speller(spell_checker); delete_aspell_config(spell_config); std::cout << canonical_form.size() << " distinct forms" << std::endl; std::cout << words.size() << " distinct words" << std::endl; // Time to condense the distribution stuff for the words std::cout << "Compiling token histograms..." << std::endl; for (auto& it : words) { it.second.forms.compile(); it.second.terms.compile(); } // Hashtag freevar is not frequency distributed for (auto& it : thashtags) { hashtags.forms.add(it); } hashtags.forms.compile(); hashtags.terms.compile(); // Compile other freevars emoticons.forms.compile(); emoticons.terms.compile(); // Compile the interned tokens. _tokenstore.compile(); // kgram distribution std::cout << "Creating markov chain... 0%" << std::flush; std::map<kgram, std::map<token_id, token_data> > tstats; len = 0; for (auto c : tokens) { len += (maxK-1) * c.size(); } startper = 0; per = 0; perprime = 0; int corpid = 0; for (auto corpus : tokens) { for (int k=0; k<maxK && k<corpus.size(); k++) { // The zero'th token should be a terminator. token_id fid = corpus[k]; const token& f = _tokenstore.get(fid); kgram term_prefix(corpus.begin(), corpus.begin()+k); term_prefix.push_front(wildcardQuery); if (tstats[term_prefix].count(fid) == 0) { tstats[term_prefix].emplace(fid, fid); } token_data& td2 = tstats[term_prefix].at(fid); td2.all++; td2.corpora.insert(corpid); if (std::find_if(f.raw.begin(), f.raw.end(), ::islower) == f.raw.end()) { td2.uppercase++; } else if (isupper(f.raw[0])) { td2.titlecase++; } } for (int k=1; k<maxK && k<corpus.size(); k++) { for (int i=0; i<(corpus.size() - k); i++) { perprime = (startper+i) * 100 / len; if (perprime != per) { per = perprime; std::cout << "\b\b\b\b" << std::right; std::cout.width(3); std::cout << per << "%" << std::flush; } kgram prefix(corpus.begin()+i, corpus.begin()+i+k); token_id fid = corpus[i+k]; const token& f = _tokenstore.get(fid); if (tstats[prefix].count(fid) == 0) { tstats[prefix].emplace(fid, fid); } token_data& td = tstats[prefix].at(fid); td.all++; td.corpora.insert(corpid); if (std::find_if(f.raw.begin(), f.raw.end(), ::islower) == f.raw.end()) { td.uppercase++; } else if (isupper(f.raw[0])) { td.titlecase++; } const token& startTok = _tokenstore.get(std::begin(prefix)->tok); if (startTok.suffix == suffixtype::terminating) { kgram term_prefix(prefix); term_prefix.pop_front(); term_prefix.push_front(wildcardQuery); if (tstats[term_prefix].count(fid) == 0) { tstats[term_prefix].emplace(fid, fid); } token_data& td2 = tstats[term_prefix].at(fid); td2.all++; td2.corpora.insert(corpid); if (std::find_if(f.raw.begin(), f.raw.end(), ::islower) == f.raw.end()) { td2.uppercase++; } else if (isupper(f.raw[0])) { td2.titlecase++; } } } startper += corpus.size(); } corpid++; } std::cout << "\b\b\b\b100%" << std::endl; // Condense the kgram distribution std::cout << "Compiling kgram distributions... 0%"; len = tstats.size(); per = 0; perprime = 0; int indicator = 0; for (auto& it : tstats) { indicator++; perprime = indicator * 100 / len; if (per != perprime) { per = perprime; std::cout << "\b\b\b\b" << std::right; std::cout.width(3); std::cout << per << "%" << std::flush; } kgram klist = it.first; auto& probtable = it.second; auto& distribution = _stats[klist]; int max = 0; for (auto& kt : probtable) { max += kt.second.all; distribution.emplace(max, kt.second); } } std::cout << "\b\b\b\b100%" << std::endl; _compiled = true; }
/* Create a new speller *******************************************************/ static PyObject* new_speller(PyObject* self, PyObject* args) { aspell_AspellObject* newobj; AspellSpeller* speller = 0; AspellConfig* config; AspellCanHaveError* possible_error; int i; int n; /* arg count */ char *key, *value; config = new_aspell_config(); if (config == NULL) { PyErr_SetString(_AspellModuleException, "can't create config"); return NULL; } /* check constructor arguments */ n = PyTuple_Size(args); switch (n) { case 0: /* no arguments passed */ break; case 2: /* constructor is called with single pair: key & value */ if (PyArg_ParseTuple(args, "ss", &key, &value)) { if (!aspell_config_replace(config, key, value)) { PyErr_SetString(_AspellConfigException, aspell_config_error_message(config)); goto arg_error; } break; } PyErr_Clear(); default: /* list of tuples key&value */ for (i=0; i<n; i++) { if (!PyArg_ParseTuple(PyTuple_GetItem(args, i), "ss", &key, &value)) { PyErr_Format(PyExc_TypeError, "argument %d: tuple of two strings (key, value) expeced", i); goto arg_error; } if (!aspell_config_replace(config, key, value)) { PyErr_SetString(_AspellConfigException, aspell_config_error_message(config)); goto arg_error; } } Py_DECREF(args); break; } /* try to create a new speller */ possible_error = new_aspell_speller(config); delete_aspell_config(config); if (aspell_error_number(possible_error) == 0) /* save a speller */ speller = to_aspell_speller(possible_error); else { /* or raise an exception */ PyErr_SetString(_AspellSpellerException, aspell_error_message(possible_error)); delete_aspell_can_have_error(possible_error); return NULL; } /* create a new py-object */ newobj = (aspell_AspellObject*)PyObject_New(aspell_AspellObject, &aspell_AspellType); newobj->speller = speller; return (PyObject*)newobj; /* argument error: before return NULL we need to delete speller's config we've created */ arg_error: delete_aspell_config(config); return NULL; }
/** * Create a userdata object * @param language the language e.g. "en_GB" * @param rules recipe file path to recipe file * @param fmt the format object containing function pointers * @return a complete userdata object or NULL */ userdata *userdata_create( const char *language, char *barefile, recipe *rules, format *fmt, hh_exceptions *hhe ) { int err = 0; userdata *u = calloc( 1, sizeof(userdata) ); if ( u != NULL ) { u->rules = rules; if ( hhe != NULL ) u->hhe = hhe; u->spell_config = new_aspell_config(); if ( u->spell_config != NULL ) { aspell_config_replace( u->spell_config, "lang", language ); AspellCanHaveError *possible_err = new_aspell_speller(u->spell_config); u->spell_checker = 0; if (aspell_error_number(possible_err) != 0) { fprintf(stderr,"%s\n",aspell_error_message(possible_err)); err = 1; } else { u->spell_checker = to_aspell_speller(possible_err); if ( u->spell_checker == NULL ) { fprintf(stderr,"userdata: failed to initialise speller\n"); err = 1; } } u->range_stack = stack_create(); if ( u->range_stack == NULL ) { err = 1; fprintf(stderr, "stripper: failed to allocate store for range stack" ); } u->ignoring = stack_create(); if ( u->ignoring == NULL ) { err = 1; fprintf(stderr, "stripper: failed to allocate store for ignore stack" ); } if ( !open_dest_files(u,barefile,fmt) ) { err = 1; fprintf(stderr,"stripper: couldn't open dest files\n"); } } else { fprintf(stderr, "userdata: failed to initialise speller\n"); err = 1; } } else fprintf(stderr, "userdata:failed to allocate object\n"); if ( err ) { userdata_dispose( u ); u = NULL; } return u; }
int main(int argc, const char *argv[]) { AspellCanHaveError * ret; AspellSpeller * speller; int have; char word[81]; char * p; char * word_end; AspellConfig * config; if (argc < 2) { printf("Usage: %s <language> [<size>|- [[<jargon>|- [<encoding>]]]\n", argv[0]); return 1; } config = new_aspell_config(); aspell_config_replace(config, "lang", argv[1]); if (argc >= 3 && argv[2][0] != '-' && argv[2][1] != '\0') aspell_config_replace(config, "size", argv[2]); if (argc >= 4 && argv[3][0] != '-') aspell_config_replace(config, "jargon", argv[3]); if (argc >= 5 && argv[4][0] != '-') aspell_config_replace(config, "encoding", argv[4]); ret = new_aspell_speller(config); delete_aspell_config(config); if (aspell_error(ret) != 0) { printf("Error: %s\n",aspell_error_message(ret)); delete_aspell_can_have_error(ret); return 2; } speller = to_aspell_speller(ret); config = aspell_speller_config(speller); fputs("Using: ", stdout); fputs(aspell_config_retrieve(config, "lang"), stdout); fputs("-", stdout); fputs(aspell_config_retrieve(config, "jargon"), stdout); fputs("-", stdout); fputs(aspell_config_retrieve(config, "size"), stdout); fputs("-", stdout); fputs(aspell_config_retrieve(config, "module"), stdout); fputs("\n\n", stdout); puts("Type \"h\" for help.\n"); while (fgets(word, 80, stdin) != 0) { /* remove trailing spaces */ word_end = strchr(word, '\0') - 1; while (word_end != word && (*word_end == '\n' || *word_end == ' ')) --word_end; ++word_end; *word_end = '\0'; putchar('\n'); switch (word[0]) { case '\0': break; case 'h': puts( "Usage: \n" " h(elp) help\n" " c <word> check if a word is the correct spelling\n" " s <word> print out a list of suggestions for a word\n" " a <word> add a word to the personal word list\n" " i <word> ignore a word for the rest of the session\n" " d <file> spell checks a document\n" " p dumps the personal word list\n" " P dumps the session word list\n" " m dumps the main word list\n" " o <option> <value> sets a config option\n" " r <option> retrieves a config option\n" " l <option> retrieves a config option as a list\n" " S saves all word lists\n" " C clear the curent sesstion word list\n" " x quite\n" ); break; case 'p': print_word_list(speller, aspell_speller_personal_word_list(speller), '\n'); break; case 'P': print_word_list(speller, aspell_speller_session_word_list(speller), '\n'); break; case 'm': print_word_list(speller, aspell_speller_main_word_list(speller), '\n'); break; case 'S': aspell_speller_save_all_word_lists(speller); check_for_error(speller); break; case 'C': aspell_speller_clear_session(speller); check_for_error(speller); break; case 'x': goto END; case 'c': if (strlen(word) < 3) { printf("Usage: %c <word>\n", word[0]); } else { have = aspell_speller_check(speller, word + 2, -1); if (have == 1) puts("correct"); else if (have == 0) puts("incorrect"); else printf("Error: %s\n", aspell_speller_error_message(speller)); } break; case 's': if (strlen(word) < 3) { printf("Usage: %c <word>\n", word[0]); } else { print_word_list(speller, aspell_speller_suggest(speller, word + 2, -1), '\n'); } break; case 'a': if (strlen(word) < 3) { printf("Usage: %c <word>\n", word[0]); } else { aspell_speller_add_to_personal(speller, word + 2, -1); check_for_error(speller); } break; case 'i': if (strlen(word) < 3) { printf("Usage: %c <word>\n", word[0]); } else { aspell_speller_add_to_session(speller, word + 2, -1); check_for_error(speller); } break; case 'o': word[80] = '\0'; /* to make sure strchr doesn't run off end of string */ p = strchr(word + 3, ' '); if (strlen(word) < 3 || p == 0) { printf("Usage: %c <option> <value>\n", word[0]); } else { *p = '\0'; ++p; aspell_config_replace(config, word + 2, p); check_for_config_error(config); } break; case 'r': if (strlen(word) < 3) { printf("Usage: %c <option>\n", word[0]); } else { const char * val = aspell_config_retrieve(config, word + 2); check_for_config_error(config); if (val) printf("%s = \"%s\"\n", word + 2, val); } break; case 'l': if (strlen(word) < 3) { printf("Usage: %c <option>\n", word[0]); } else { AspellStringList * lst = new_aspell_string_list(); AspellMutableContainer * lst0 = aspell_string_list_to_mutable_container(lst); AspellStringEnumeration * els; const char * val; aspell_config_retrieve_list(config, word + 2, lst0); check_for_config_error(config); els = aspell_string_list_elements(lst); printf("%s:\n", word + 2); while ( (val = aspell_string_enumeration_next(els)) != 0) printf(" %s\n", val); delete_aspell_string_enumeration(els); delete_aspell_string_list(lst); } break; case 'd': if (strlen(word) < 3) { printf("Usage: %c <file>\n", word[0]); } else { check_document(speller, word + 2); printf("\n"); } break; default: printf("Unknown Command: %s\n", word); } putchar('\n'); } END: delete_aspell_speller(speller); return 0; }
AspellSpeller * #endif /* USE_ENCHANT */ weechat_aspell_speller_new (const char *lang) { #ifdef USE_ENCHANT EnchantDict *new_speller; #else AspellConfig *config; AspellCanHaveError *ret; AspellSpeller *new_speller; #endif /* USE_ENCHANT */ struct t_infolist *infolist; if (!lang) return NULL; if (weechat_aspell_plugin->debug) { weechat_printf (NULL, "%s: creating new speller for lang \"%s\"", ASPELL_PLUGIN_NAME, lang); } #ifdef USE_ENCHANT new_speller = enchant_broker_request_dict (broker, lang); if (!new_speller) { weechat_printf (NULL, _("%s%s: error: unable to create speller for lang \"%s\""), weechat_prefix ("error"), ASPELL_PLUGIN_NAME, lang); return NULL; } #else /* create a speller instance for the newly created cell */ config = new_aspell_config (); aspell_config_replace (config, "lang", lang); #endif /* USE_ENCHANT */ /* apply all options */ infolist = weechat_infolist_get ("option", NULL, "aspell.option.*"); if (infolist) { while (weechat_infolist_next (infolist)) { #ifdef USE_ENCHANT /* TODO: set option with enchant */ #else aspell_config_replace (config, weechat_infolist_string (infolist, "option_name"), weechat_infolist_string (infolist, "value")); #endif /* USE_ENCHANT */ } weechat_infolist_free (infolist); } #ifndef USE_ENCHANT ret = new_aspell_speller (config); if (aspell_error (ret) != 0) { weechat_printf (NULL, "%s%s: error: %s", weechat_prefix ("error"), ASPELL_PLUGIN_NAME, aspell_error_message (ret)); delete_aspell_config (config); delete_aspell_can_have_error (ret); return NULL; } new_speller = to_aspell_speller (ret); #endif /* USE_ENCHANT */ weechat_hashtable_set (weechat_aspell_spellers, lang, new_speller); #ifndef USE_ENCHANT /* free configuration */ delete_aspell_config (config); #endif /* USE_ENCHANT */ return new_speller; }
void doSpellCheckDoc(GtkWidget* widget,gpointer data) { GtkTextIter start; GtkTextIter end; AspellCanHaveError* ret; AspellDocumentChecker* checker; AspellToken token; int diff; unsigned int goodwordlen; char* word_begin; char* badword; GtkTextIter startiter; GtkTextIter enditer; char* line; pageStruct* page=getPageStructPtr(-1); gtk_text_buffer_get_start_iter((GtkTextBuffer*)page->buffer,&startiter); gtk_text_buffer_get_end_iter((GtkTextBuffer*)page->buffer,&enditer); line=gtk_text_buffer_get_text((GtkTextBuffer*)page->buffer,&startiter,&enditer,false); /* Set up the document checker */ ret=new_aspell_document_checker(spellChecker); if (aspell_error(ret)!=0) { printf("Error: %s\n",aspell_error_message(ret)); return; } checker=to_aspell_document_checker(ret); /* First process the line */ aspell_document_checker_process(checker,line,-1); diff=0; /* Now find the misspellings in the line */ while(token=aspell_document_checker_next_misspelling(checker),token.len!=0) { /* Pay particular attention to how token.offset and diff is used */ asprintf(&badword,"%.*s",token.len,(char*)&line[token.offset+diff]); goodWord=NULL; checkTheWord(badword,1); if(cancelCheck==true) { delete_aspell_document_checker(checker); return; } word_begin=line+token.offset+diff; if(goodWord!=NULL) { goodwordlen=strlen(goodWord); /* Replace the misspelled word with the replacement */ diff+=goodwordlen-token.len; memmove(word_begin+goodwordlen,word_begin+token.len,strlen(word_begin+token.len)+1); memcpy(word_begin,goodWord,goodwordlen); } } delete_aspell_document_checker(checker); gtk_text_buffer_get_bounds((GtkTextBuffer*)page->buffer,&start,&end); gtk_text_buffer_select_range((GtkTextBuffer*)page->buffer,&start,&end); gtk_text_buffer_delete_selection((GtkTextBuffer*)page->buffer,true,true); gtk_text_buffer_get_start_iter((GtkTextBuffer*)page->buffer,&start); gtk_text_buffer_insert((GtkTextBuffer*)page->buffer,&start,line,-1); if(spellCheckWord!=NULL) { gtk_widget_destroy(spellCheckWord); spellCheckWord=NULL; } }
static void check_document(AspellSpeller * speller, const char * filename) { /* For readablity this function does not worry about buffer overrun. This is meant as an illustrative example only. Please do not attent to spell check your docuemnts with this function. */ AspellCanHaveError * ret; AspellDocumentChecker * checker; AspellToken token; FILE * doc, * out; char line[256], repl[256], checked_filename[256]; int diff; unsigned int repl_len; char * word_begin; /* Open the file */ doc = fopen(filename, "r"); if (doc == 0) { printf("Error: Unable to open the file \"%s\" for reading.", filename); return; } /* Open filename.checked for writing the results */ strcpy(checked_filename, filename); strcat(checked_filename, ".checked"); out = fopen(checked_filename, "w"); if (out == 0) { printf("Error: Unable to open the file \"%s\" for writing.", checked_filename); return; } /* Set up the document checker */ ret = new_aspell_document_checker(speller); if (aspell_error(ret) != 0) { printf("Error: %s\n",aspell_error_message(ret)); return; } checker = to_aspell_document_checker(ret); while (fgets(line, 256, doc)) { /* First process the line */ aspell_document_checker_process(checker, line, -1); diff = 0; /* Now find the misspellings in the line */ while (token = aspell_document_checker_next_misspelling(checker), token.len != 0) { /* Print out the misspelling and get a replasment from the user */ /* Pay particular attention to how token.offset and diff is used */ word_begin = line + token.offset + diff; printf("%.*s*%.*s*%s", (int)(token.offset + diff), line, (int)token.len, word_begin, word_begin + token.len); printf("Suggestions: "); print_word_list(speller, aspell_speller_suggest(speller, word_begin, token.len), ' '); printf("\n"); printf("Replacement? "); fgets(repl, 256, stdin); printf("\n"); if (repl[0] == '\n') continue; /* ignore the current misspelling */ repl_len = strlen(repl) - 1; repl[repl_len] = '\0'; /* Replace the misspelled word with the replacement */ diff += repl_len - token.len; memmove(word_begin + repl_len, word_begin + token.len, strlen(word_begin + token.len) + 1); memcpy(word_begin, repl, repl_len); } /* print the line to filename.checked */ fputs(line, out); } delete_aspell_document_checker(checker); printf("Done. Results saved to \"%s\".", checked_filename); }