regexp_filter::regexp_filter( const std::string& regexp, const std::string& replace) : re_(regexp), replace_(replace) { if (!re_.ok()) { throw JUBATUS_EXCEPTION( converter_exception("invalid regular expression: " + regexp)); } std::string error; if (!re_.CheckRewriteString(replace, &error)) { throw JUBATUS_EXCEPTION( converter_exception(error + " : " + replace)); } }
feature_hasher::feature_hasher(uint64_t max) : max_size_(max) { if (max == 0) { throw JUBATUS_EXCEPTION( converter_exception("feature max size must be positive")); } }
re2_filter::re2_filter(const string& regexp, const string& replace) : re_(regexp), replace_(replace) { if (!re_.ok()) { throw JUBATUS_EXCEPTION(converter_exception("invalid regular expression: " + regexp)); } }
const string& get_or_die(const map<string, string> & params, const string& key) { map<string, string>::const_iterator it = params.find(key); if (it == params.end()) { throw JUBATUS_EXCEPTION(converter_exception(string("\"" + key + "\" is not specified in parameters"))); } return it->second; }
regexp_match::regexp_match(const std::string& regexp) : reg_(NULL) { const UChar* pattern = reinterpret_cast<const UChar*>(regexp.c_str()); if (ONIG_NORMAL != onig_new(®_, pattern, pattern + regexp.size(), ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_PERL, NULL)) { throw JUBATUS_EXCEPTION(converter_exception("invalid regular expression")); } }
void* dynamic_loader::load_symbol(const std::string& name) const { dlerror(); void* func = dlsym(handle_, name.c_str()); char* error = dlerror(); if (error != NULL) { throw converter_exception(error); } return func; }
regexp_splitter::regexp_splitter(const std::string& regexp, int group) : re_(regexp), group_(group) { if (group < 0) { throw JUBATUS_EXCEPTION(converter_exception( "'group' must be positive: " + lexical_cast<std::string>(group))); } if (!re_.ok()) { throw JUBATUS_EXCEPTION( converter_exception("invalid regular expression: " + regexp)); } if (group > re_.NumberOfCapturingGroups()) { std::string msg = "regexp '" + regexp + "' only contains " + lexical_cast<std::string>(re_.NumberOfCapturingGroups()) + " groups, but 'group' is " + lexical_cast<std::string>(group); throw JUBATUS_EXCEPTION(converter_exception(msg)); } }
num_feature* num_feature_factory::create(const string& name, const num_feature_factory::param_t& params) const { if (name == "dynamic") { return create_dynamic_num_feature(params); } else { throw converter_exception(string("unknonwn num feature name: ") + name); } }
const int get_int_or_die(const map<string, string>& params, const string& key) { const string& s = get_or_die(params, key); try { return pfi::lang::lexical_cast<int>(s); } catch (const bad_cast& e) { throw JUBATUS_EXCEPTION(converter_exception(string("\"" + key + "\" must be an integer value: " + s))); } }
dynamic_loader::dynamic_loader(const std::string& path) : handle_(0) { void* handle = dlopen(path.c_str(), RTLD_LAZY); if (!handle) { char *error = dlerror(); throw converter_exception("cannot load dynamic library: " + path + ": " + error); } handle_ = handle; }
static MeCab::Tagger* create_mecab_tagger(const char* arg) { MeCab::Tagger* t = MeCab::createTagger(arg); if (!t) { string msg("cannot make mecab tagger: "); msg += MeCab::getTaggerError(); throw converter_exception(msg); } else { return t; } }
num_filter* num_filter_factory::create( const string& name, const map<string, string>& params) const { if (name == "add") { return create_add_filter(params); } else if (name == "dynamic") { return create_dynamic_filter(params); } else { throw JUBATUS_EXCEPTION(converter_exception("unknonw num filter name: " + name)); } }
word_splitter* splitter_factory::create( const std::string& name, const param_t& params) const { if (name == "ngram") { return create_character_ngram(params); } else if (name == "dynamic") { return create_dynamic_splitter(params); } else { throw JUBATUS_EXCEPTION( converter_exception(std::string("unknown splitter name: ") + name)); } }
regexp_splitter::regexp_splitter(const std::string& regexp, int group) : reg_(NULL), group_(group) { if (group < 0) { throw JUBATUS_EXCEPTION(converter_exception("'group' must be positive")); } const UChar* pattern = reinterpret_cast<const UChar*>(regexp.data()); if (ONIG_NORMAL != onig_new(®_, pattern, pattern + regexp.size(), ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_PERL, NULL)) { throw JUBATUS_EXCEPTION(converter_exception("invalid regular expression")); } const int num_capture = onig_number_of_captures(reg_); if (group > num_capture) { std::string msg = "regexp '" + regexp + "' only contains " + lexical_cast<std::string>(num_capture) + " groups, but 'group' is " + lexical_cast<std::string>(group); throw JUBATUS_EXCEPTION(converter_exception(msg)); } }
void* dynamic_loader::load_symbol(const std::string& name) const { dlerror(); void* func = dlsym(handle_, name.c_str()); char* error = dlerror(); if (error != NULL) { throw JUBATUS_EXCEPTION(converter_exception("cannot dlsym: " + name) << jubatus::core::common::exception::error_api_func("dlsym") << jubatus::core::common::exception::error_message("dlsym name: " + name) << jubatus::core::common::exception::error_message(error)); } return func; }
dynamic_loader::dynamic_loader(const std::string& path) : handle_(0) { void* handle = dlopen(path.c_str(), RTLD_LAZY); if (!handle) { char *error = dlerror(); throw JUBATUS_EXCEPTION(converter_exception("cannot load dynamic library: " + path + ": " + error) << jubatus::exception::error_api_func("dlopen") << jubatus::exception::error_file_name(path) << jubatus::exception::error_message(error)); } handle_ = handle; }
string_filter* string_filter_factory::create(const string& name, const map<string, string>& params) const { #ifdef HAVE_RE2 if (name == "regexp") { return create_re2_filter(params); } else #endif if (name == "dynamic") { return create_dynamic_filter(params); } else { throw converter_exception("unknown filter name: " + name); } }
dynamic_loader::dynamic_loader(const std::string& path) : handle_(0) { void* handle = NULL; std::string loaded_path; if (is_absolute_or_relative_path(path)) { // Load the plugin with the given path handle = ::dlopen(path.c_str(), RTLD_LAZY); loaded_path = path; } else { // Try to load the plugin from the plugin path environment const char* plugin_dir = get_plugin_path(); if (plugin_dir) { const std::string plugin_path = std::string(plugin_dir) + "/" + path; handle = ::dlopen(plugin_path.c_str(), RTLD_LAZY); loaded_path = plugin_path; } // If failed, try to load it from the plugin directory specified on // configure. if (!handle) { const std::string plugin_path = std::string(JUBATUS_PLUGIN_DIR) + "/" + path; handle = ::dlopen(plugin_path.c_str(), RTLD_LAZY); loaded_path = plugin_path; } } handle_ = handle; if (!handle_) { char* error = dlerror(); throw JUBATUS_EXCEPTION( converter_exception( "cannot load dynamic library: " + path + ": " + error) << jubatus::core::common::exception::error_api_func("dlopen") << jubatus::core::common::exception::error_file_name(path) << jubatus::core::common::exception::error_message(error)); } else { try { typedef std::string (*func_t)(void); func_t version = reinterpret_cast<func_t>(load_symbol("version")); LOG(INFO) << "plugin loaded: " << common::real_path(loaded_path) << " version: " << version(); } catch (converter_exception) { LOG(WARN) << "plugin loaded: " << common::real_path(loaded_path) << " but version information is unavailable"; } } }
shared_ptr<string_feature> string_feature_factory::create( const std::string& name, const param_t& params) const { string_feature* p; if (name == "ngram") { return create_character_ngram(params); } else if (name == "regexp") { return create_regexp(params); } else if (ext_ && (p = ext_(name, params))) { return shared_ptr<string_feature>(p); } else { throw JUBATUS_EXCEPTION( converter_exception(std::string("unknown splitter name: ") + name)); } }
void init_num_rules( const std::vector<num_rule>& num_rules, const std::map<std::string, num_feature_ptr>& num_features, datum_to_fv_converter& conv) { for (size_t i = 0; i < num_rules.size(); ++i) { const num_rule& rule = num_rules[i]; matcher_ptr m(create_key_matcher(rule.key, rule.except)); std::map<std::string, num_feature_ptr>::const_iterator it = num_features.find(rule.type); if (it == num_features.end()) { throw JUBATUS_EXCEPTION( converter_exception("unknown type: " + rule.type)); } conv.register_num_rule(rule.type, m, it->second); } }
void init_string_filter_rules( const std::vector<filter_rule>& filter_rules, const std::map<std::string, string_filter_ptr>& filters, datum_to_fv_converter& conv) { for (size_t i = 0; i < filter_rules.size(); ++i) { const filter_rule& rule = filter_rules[i]; std::map<std::string, string_filter_ptr>::const_iterator it = filters.find(rule.type); if (it == filters.end()) { throw JUBATUS_EXCEPTION( converter_exception("unknown type: " + rule.type)); } matcher_ptr m(create_key_matcher(rule.key, rule.except)); conv.register_string_filter(m, it->second, rule.suffix); } }
key_matcher* key_matcher_factory::create_matcher(const std::string& matcher) { if (matcher == "" || matcher == "*") { return new match_all(); } else if (matcher[0] == '*') { return new suffix_match(matcher.substr(1)); } else if (matcher[matcher.size() - 1] == '*') { return new prefix_match(matcher.substr(0, matcher.size() - 1)); } else if (matcher.size() >= 2 && matcher[0] == '/' && matcher[matcher.size() - 1] == '/') { #ifdef HAVE_RE2 return new re2_match(matcher.substr(1, matcher.size() - 2)); #else throw converter_exception("cannot use regexp rule: " + matcher); #endif } else { return new exact_match(matcher); } }
void init_string_rules( const std::vector<string_rule>& string_rules, const std::map<std::string, splitter_ptr>& splitters, datum_to_fv_converter& conv) { for (size_t i = 0; i < string_rules.size(); ++i) { const string_rule& rule = string_rules[i]; matcher_ptr m(create_key_matcher(rule.key, rule.except)); std::map<std::string, splitter_ptr>::const_iterator it = splitters.find(rule.type); if (it == splitters.end()) { throw JUBATUS_EXCEPTION( converter_exception("unknown type: " + rule.type)); } std::vector<splitter_weight_type> ws; ws.push_back(make_weight_type(rule.sample_weight, rule.global_weight)); conv.register_string_rule(rule.type, m, it->second, ws); } }
shared_ptr<num_filter> num_filter_factory::create( const std::string& name, const param_t& params) const { num_filter* p; if (name == "add") { return create_add_filter(params); } else if (name == "linear_normalization") { return create_linear_normalization_filter(params); } else if (name == "gaussian_normalization") { return create_gaussian_normalization_filter(params); } else if (name == "sigmoid_normalization") { return create_sigmoid_normalization_filter(params); } else if (ext_ && (p = ext_(name, params))) { return shared_ptr<num_filter>(p); } else { throw JUBATUS_EXCEPTION( converter_exception("unknonw num filter name: " + name)); } }
dynamic_loader::dynamic_loader(const std::string& path) : handle_(0) { void* handle = NULL; std::string loaded_path; if (is_absolute_or_relative_path(path)) { // If the path contains "/", load the plugin with the given path. handle = ::dlopen(path.c_str(), RTLD_LAZY); loaded_path = path; } else { // Try to load the plugin from the plugin path environment. const char* plugin_dir = get_plugin_path(); if (plugin_dir) { const std::string plugin_path = std::string(plugin_dir) + "/" + path; handle = ::dlopen(plugin_path.c_str(), RTLD_LAZY); loaded_path = plugin_path; } // If failed, try to load it from the plugin directory specified on // configure. if (!handle) { const std::string plugin_path = std::string(JUBATUS_PLUGIN_DIR) + "/" + path; handle = ::dlopen(plugin_path.c_str(), RTLD_LAZY); loaded_path = plugin_path; } } if (!handle) { char* error = dlerror(); throw JUBATUS_EXCEPTION( converter_exception( "cannot load dynamic library: " + path + ": " + error) << jubatus::core::common::exception::error_api_func("dlopen") << jubatus::core::common::exception::error_file_name(path) << jubatus::core::common::exception::error_message(error)); } handle_ = handle; }
dynamic_loader::dynamic_loader(const std::string& path) : handle_(0) { void* handle = dlopen(path.c_str(), RTLD_LAZY); if (!handle) { // dlopen from JUBATUS_PLUGIN_DIR const std::string plugin_name = jubatus::util::base_name(path); const std::string plugin_path = std::string(JUBATUS_PLUGIN_DIR) + "/" + plugin_name; handle = dlopen(plugin_path.c_str() , RTLD_LAZY); } if (!handle) { char *error = dlerror(); throw JUBATUS_EXCEPTION(converter_exception("cannot load dynamic library: " + path + ": " + error) << jubatus::exception::error_api_func("dlopen") << jubatus::exception::error_file_name(path) << jubatus::exception::error_message(error)); } handle_ = handle; }
re2_match::re2_match(const std::string& regexp) : re_(regexp) { if (!re_.ok()) { throw converter_exception("invalid regular expression"); } }
regexp_match::regexp_match(const std::string& regexp) : re_(regexp) { if (!re_.ok()) { throw JUBATUS_EXCEPTION(converter_exception("invalid regular expression")); } }
void check_null_instance(void* inst) { if (!inst) { throw JUBATUS_EXCEPTION(converter_exception("failed to load plugin")); } }