int String::find (char ch, int start, int options) const { if (start < 0) start += _len (buf); if (start < 0) start = 0; if (start > _len (buf)) start = _len (buf); if (options & FIND_CASE_INSENSITIVE) ch = tolower ((unsigned char) ch); if (options & FIND_REVERSE) { for (int i = start - 1; i >= 0; i--) { if ((options & FIND_CASE_INSENSITIVE ? tolower ((unsigned char) buf[i]) == ch : buf[i] == ch) && ((options & FIND_WHOLE_WORD) == 0 || ( isWordBoundary (i) && isWordBoundary (i + 1)))) return i; } } else { for (int i = start; i < _len (buf); i++) { if ((options & FIND_CASE_INSENSITIVE ? tolower ((unsigned char) buf[i]) == ch : buf[i] == ch) && ((options & FIND_WHOLE_WORD) == 0 || ( isWordBoundary (i) && isWordBoundary (i + 1)))) return i; } } return -1; }
void c_SentenceEntry::add(const c_LabelEntry & l) { if (l.phon[l.phon.size()-1] != '-' || (l.phon[l.phon.size()-2] == '#' && l.phon[l.phon.size()-1] == '-')) { if(isWordBoundary(l.phon)) { if(l.pros.size()) type = isSentenceDelimiter(l.pros); if(start) { word.add(l); word.setPros(l.pros); word.setFirst(l.first); start = false; } else { word.setLast(l.first); word.finish(l); words.push_back(word); word = c_WordEntry(sb_sym); word.setPros(l.pros); word.setFirst(l.first); word.add(l); } } else { word.add(l); } } }
bool LexicalAnalyser::readWord(std::string *word) { std::string returnWord; char ch; bool startedWord = false; while (readNext(&ch)) { if (!isWordBoundary(ch)) { // Append the character returnWord += ch; startedWord = true; } else { // If the word has started, return the word, else wait for the word to start if (startedWord) { *word = returnWord; return true; } } } if (startedWord && returnWord.length() != 0) { *word = returnWord; return true; } return false; }
bool CRegExp::checkMetaSymbol(EMetaSymbols symb, int &toParse) { const String &pattern = *global_pattern; switch(symb){ case ReAnyChr: if (toParse >= end) return false; if (!singleLine && (pattern[toParse] == 0x0A || pattern[toParse] == 0x0B || pattern[toParse] == 0x0C || pattern[toParse] == 0x0D || pattern[toParse] == 0x85 || pattern[toParse] == 0x2028 || pattern[toParse] == 0x2029)) return false; toParse++; return true; case ReSoL: if (multiLine){ bool ok = false; if (toParse && (pattern[toParse-1] == 0x0A || pattern[toParse-1] == 0x0B || pattern[toParse-1] == 0x0C || pattern[toParse-1] == 0x0D || pattern[toParse-1] == 0x85 || pattern[toParse-1] == 0x2028 || pattern[toParse-1] == 0x2029)) ok = true; return (toParse == 0 || ok); }; return (toParse == 0); case ReEoL: if (multiLine){ bool ok = false; // ???check if (toParse && toParse < end && (pattern[toParse-1] == 0x0A || pattern[toParse-1] == 0x0B || pattern[toParse-1] == 0x0C || pattern[toParse-1] == 0x0D || pattern[toParse-1] == 0x85 || pattern[toParse-1] == 0x2028 || pattern[toParse-1] == 0x2029)) ok = true; return (toParse == end || ok); }; return (end == toParse); case ReDigit: if (toParse >= end || !Character::isDigit(pattern[toParse])) return false; toParse++; return true; case ReNDigit: if (toParse >= end || Character::isDigit(pattern[toParse])) return false; toParse++; return true; case ReWordSymb: if (toParse >= end || !(Character::isLetterOrDigit(pattern[toParse]) || pattern[toParse] == '_')) return false; toParse++; return true; case ReNWordSymb: if (toParse >= end || Character::isLetterOrDigit(pattern[toParse]) || pattern[toParse] == '_') return false; toParse++; return true; case ReWSpace: if (toParse >= end || !Character::isWhitespace(pattern[toParse])) return false; toParse++; return true; case ReNWSpace: if (toParse >= end || Character::isWhitespace(pattern[toParse])) return false; toParse++; return true; case ReUCase: if (toParse >= end || !Character::isUpperCase(pattern[toParse])) return false; toParse++; return true; case ReNUCase: if (toParse >= end || !Character::isLowerCase(pattern[toParse])) return false; toParse++; return true; case ReWBound: return isWordBoundary(toParse); case ReNWBound: return isNWordBoundary(toParse); case RePreNW: if (toParse >= end) return true; return toParse == 0 || !Character::isLetter(pattern[toParse-1]); #ifdef COLORERMODE case ReSoScheme: return (schemeStart == toParse); case ReStart: matches->s[0] = toParse; startChange = true; return true; case ReEnd: matches->e[0] = toParse; endChange = true; return true; #endif default: return false; }; }
bool CRegExp::isNWordBoundary(int &toParse) { return !isWordBoundary(toParse); };
int String::find (char const* str, int start, int options) const { static int kmpbuf[256]; #define comp(a,b) (options&FIND_CASE_INSENSITIVE?tolower((unsigned char)a)==tolower((unsigned char)b):(a)==(b)) if (start < 0) start += _len (buf); if (start < 0) start = 0; if (start > _len (buf)) start = _len (buf); if (*str == 0) return start; int len = (int) strlen (str); int* kmp = kmpbuf; if (len > 256) kmp = new int[len]; kmp[0] = 0; int result = -1; if (options & FIND_REVERSE) { for (int i = 1; i < len; i++) { kmp[i] = kmp[i - 1]; while (kmp[i] && !comp (str[len - 1 - kmp[i]], str[len - 1 - i])) kmp[i] = kmp[kmp[i] - 1]; if (comp (str[len - 1 - kmp[i]], str[len - 1 - i])) kmp[i]++; } int cur = 0; for (int i = start - 1; i >= 0; i--) { while (cur && !comp (str[len - 1 - cur], buf[_len (buf) - 1 - i])) cur = kmp[cur - 1]; if (comp (str[len - 1 - cur], buf[_len (buf) - 1 - i])) cur++; if (cur == len) { if ((options & FIND_WHOLE_WORD) == 0 || ( isWordBoundary (i) && isWordBoundary (i + len))) { result = i; break; } } } } else { for (int i = 1; i < len; i++) { kmp[i] = kmp[i - 1]; while (kmp[i] && !comp (str[kmp[i]], str[i])) kmp[i] = kmp[kmp[i] - 1]; if (comp (str[kmp[i]], str[i])) kmp[i]++; } int cur = 0; for (int i = start; i < _len (buf); i++) { while (cur && !comp (str[cur], buf[i])) cur = kmp[cur - 1]; if (comp (str[cur], buf[i])) cur++; if (cur == len) { if ((options & FIND_WHOLE_WORD) == 0 || ( isWordBoundary (i - len + 1) && isWordBoundary (i + 1))) { result = i - len + 1; break; } } } } if (kmp != kmpbuf) delete[] kmp; return result; }