bool String::Contains(s_char c) const { MUTEX_LOCK(str_mutex); #ifdef SCRATCH_NO_UTF8 int iLen = strlen(str_szBuffer); for (int i = 0; i < iLen; i++) { if (str_szBuffer[i] == c) { return true; } } return false; #else return utf8chr(str_szBuffer, c) != nullptr; #endif }
RegexRule::RegexRule(string region, string pattern, std::string letters_regex, std::string numbers_regex) //: re2_regex("") { this->original = pattern; this->region = region; this->regex = ""; this->valid = false; string::iterator end_it = utf8::find_invalid(pattern.begin(), pattern.end()); if (end_it != pattern.end()) { cerr << "Invalid UTF-8 encoding detected " << endl; return; } std::stringstream regexval; string::iterator utf_iterator = pattern.begin(); numchars = 0; while (utf_iterator < pattern.end()) { int cp = utf8::next(utf_iterator, pattern.end()); string utf_character = utf8chr(cp); if (utf_character == "[") { regexval << "["; while (utf_character != "]" ) { if (utf_iterator >= pattern.end()) break; // Invalid regex, don't bother processing int cp = utf8::next(utf_iterator, pattern.end()); utf_character = utf8chr(cp); regexval << utf_character; } } else if (utf_character == "\\") { // Don't add "\" characters to our character count regexval << utf_character; continue; } else if (utf_character == "?") { regexval << "."; this->skipPositions.push_back(numchars); } else if (utf_character == "@") { regexval << letters_regex; } else if (utf_character == "#") { regexval << numbers_regex; } else if ((utf_character == "*") || (utf_character == "+")) { cerr << "Regex with wildcards (* or +) not supported" << endl; } else { regexval << utf_character; } numchars++; } this->regex = regexval.str(); re2_regex = new re2::RE2(this->regex); if (!re2_regex->ok()) { cerr << "Unable to load regex: " << pattern << endl; } else { this->valid = true; } }