void ContextTracker::update() { std::stringstream change; // prepend partially entered token to change if exists, need to // look into sliding_window to get previously partially entered // token if it exists std::stringstream sliding_window_stream; sliding_window_stream << contextChangeDetector->get_sliding_window(); ReverseTokenizer rTok(sliding_window_stream, blankspaceChars, separatorChars); std::string first_token = rTok.nextToken(); if (!first_token.empty()) { change << first_token; } logger << DEBUG << "update(): getPastStream(): " << getPastStream() << endl; // append change detected by context change detector change << contextChangeDetector->change(getPastStream()); logger << INFO << "update(): change: " << change.str() << endl; // split change up into tokens std::vector<std::string> change_tokens; ForwardTokenizer tok(change, blankspaceChars, separatorChars); logger << INFO << "update(): tokenized change: "; while (tok.hasMoreTokens()) { std::string token = tok.nextToken(); change_tokens.push_back(token); logger << INFO << token << ':'; } logger << INFO << endl; if (! change_tokens.empty()) { // remove prefix (partially entered token or empty token) change_tokens.pop_back(); } logger << INFO << "update(): change tokens: "; for (std::vector<std::string>::const_iterator it = change_tokens.begin(); it != change_tokens.end(); it++) { logger << INFO << *it << ':'; } logger << INFO << endl; // time to learn PluginRegistry::Iterator it = pluginRegistry->iterator(); Plugin* plugin = 0; while (it.hasNext()) { plugin = it.next(); plugin->learn(change_tokens); } // update sliding window contextChangeDetector->update_sliding_window(getPastStream()); }
std::string ContextChangeDetector::change(const std::string& past_stream) const { const std::string& prev_context = sliding_window; // let's rename these const std::string& curr_context = past_stream; // for clarity's sake std::string result; if (sliding_window.empty()) { result = past_stream; } else { // find position of previous context in current context // i.e. find index pointing to last char of last occurence of // prev_context in curr_context std::string::size_type ctx_idx = curr_context.rfind(prev_context); if (ctx_idx == std::string::npos) { // prev_context could not be found in curr_context, a lot // changed result = past_stream; } else { // found prev_context, examine remainder string. // remainder string is substr(ctx_idx + // prev_context.size()); i.e. substring given by index // returned by rfind (which points at beginning of // prev_context string found in curr_context) plus size of // prev_context: this index points at end of prev_context // substring found in curr_context result = curr_context.substr(ctx_idx + prev_context.size()); // handle case where a context change has occured and // remainder string only contains part of the last token, // i.e.: // // sliding_window = "The quick bro"; // past_stream = "The quick brown "; // // In this case, the remainder will only contain "wn", and // the last token in the sliding window must be prepended // to the change to be learnt // if (context_change(past_stream)) { // prepend partially entered token to change if it // exists, need to look into sliding_window to get // previously partially entered token if it exists std::stringstream sliding_window_stream; sliding_window_stream << get_sliding_window(); ReverseTokenizer rTok(sliding_window_stream, blankspaceChars, separatorChars); rTok.lowercaseMode(lowercase_mode); std::string first_token = rTok.nextToken(); if (!first_token.empty()) { result = first_token + result; } } } } return result; }