int String::find (char ch, int start, int options) const
{
  if (start < 0) start += _len (buf);
  if (start < 0) start = 0;
  if (start > _len (buf)) start = _len (buf);
  if (options & FIND_CASE_INSENSITIVE)
    ch = tolower ((unsigned char) ch);
  if (options & FIND_REVERSE)
  {
    for (int i = start - 1; i >= 0; i--)
    {
      if ((options & FIND_CASE_INSENSITIVE ? tolower ((unsigned char) buf[i]) == ch : buf[i] == ch) &&
        ((options & FIND_WHOLE_WORD) == 0 || (
        isWordBoundary (i) && isWordBoundary (i + 1))))
        return i;
    }
  }
  else
  {
    for (int i = start; i < _len (buf); i++)
    {
      if ((options & FIND_CASE_INSENSITIVE ? tolower ((unsigned char) buf[i]) == ch : buf[i] == ch) &&
        ((options & FIND_WHOLE_WORD) == 0 || (
        isWordBoundary (i) && isWordBoundary (i + 1))))
        return i;
    }
  }
  return -1;
}
Beispiel #2
0
void c_SentenceEntry::add(const c_LabelEntry & l)
{
  if (l.phon[l.phon.size()-1] != '-' || (l.phon[l.phon.size()-2] == '#' && l.phon[l.phon.size()-1] == '-')) {
    if(isWordBoundary(l.phon)) {
      if(l.pros.size())
	type = isSentenceDelimiter(l.pros);
      if(start) {
	word.add(l);
	word.setPros(l.pros);
      word.setFirst(l.first);
      start = false;
    }
      else {
	word.setLast(l.first);
	word.finish(l);
	words.push_back(word);
	word = c_WordEntry(sb_sym);
	word.setPros(l.pros);
	word.setFirst(l.first);
	word.add(l);
      }
    }
    else {
      word.add(l);
    }	
  }
}
bool LexicalAnalyser::readWord(std::string *word) {
    std::string returnWord;
    char ch;
    bool startedWord = false;
    while (readNext(&ch)) {
        if (!isWordBoundary(ch)) {
            // Append the character
            returnWord += ch;
            startedWord = true;
        } else {
            // If the word has started, return the word, else wait for the word to start
            if (startedWord) {
                *word = returnWord;
                return true;
            }
        }
    }
    if (startedWord && returnWord.length() != 0) {
        *word = returnWord;
        return true;
    }
    return false;
}
Beispiel #4
0
bool CRegExp::checkMetaSymbol(EMetaSymbols symb, int &toParse)
{
const String &pattern = *global_pattern;

  switch(symb){
    case ReAnyChr:
      if (toParse >= end) return false;
      if (!singleLine && (pattern[toParse] == 0x0A || pattern[toParse] == 0x0B ||
                          pattern[toParse] == 0x0C || pattern[toParse] == 0x0D ||
                          pattern[toParse] == 0x85 ||
                          pattern[toParse] == 0x2028 ||
                          pattern[toParse] == 0x2029)) return false;
      toParse++;
      return true;
    case ReSoL:
      if (multiLine){
        bool ok = false;
        if (toParse && (pattern[toParse-1] == 0x0A || pattern[toParse-1] == 0x0B ||
                          pattern[toParse-1] == 0x0C || pattern[toParse-1] == 0x0D ||
                          pattern[toParse-1] == 0x85 ||
                          pattern[toParse-1] == 0x2028 ||
                          pattern[toParse-1] == 0x2029)) ok = true;
        return (toParse == 0 || ok);
      };
      return (toParse == 0);
    case ReEoL:
      if (multiLine){
        bool ok = false; // ???check
        if (toParse && toParse < end &&
                         (pattern[toParse-1] == 0x0A || pattern[toParse-1] == 0x0B ||
                          pattern[toParse-1] == 0x0C || pattern[toParse-1] == 0x0D ||
                          pattern[toParse-1] == 0x85 ||
                          pattern[toParse-1] == 0x2028 ||
                          pattern[toParse-1] == 0x2029)) ok = true;
        return (toParse == end || ok);
      };
      return (end == toParse);
    case ReDigit:
      if (toParse >= end || !Character::isDigit(pattern[toParse])) return false;
      toParse++;
      return true;
    case ReNDigit:
      if (toParse >= end || Character::isDigit(pattern[toParse])) return false;
      toParse++;
      return true;
    case ReWordSymb:
      if (toParse >= end || !(Character::isLetterOrDigit(pattern[toParse])
          || pattern[toParse] == '_')) return false;
      toParse++;
      return true;
    case ReNWordSymb:
      if (toParse >= end || Character::isLetterOrDigit(pattern[toParse])
          || pattern[toParse] == '_') return false;
      toParse++;
      return true;
    case ReWSpace:
      if (toParse >= end || !Character::isWhitespace(pattern[toParse])) return false;
      toParse++;
      return true;
    case ReNWSpace:
      if (toParse >= end || Character::isWhitespace(pattern[toParse])) return false;
      toParse++;
      return true;
    case ReUCase:
      if (toParse >= end || !Character::isUpperCase(pattern[toParse])) return false;
      toParse++;
      return true;
    case ReNUCase:
      if (toParse >= end || !Character::isLowerCase(pattern[toParse])) return false;
      toParse++;
      return true;
    case ReWBound:
      return isWordBoundary(toParse);
    case ReNWBound:
      return isNWordBoundary(toParse);
    case RePreNW:
      if (toParse >= end) return true;
      return toParse == 0 || !Character::isLetter(pattern[toParse-1]);
#ifdef COLORERMODE
    case ReSoScheme:
      return (schemeStart == toParse);
    case ReStart:
      matches->s[0] = toParse;
      startChange = true;
      return true;
    case ReEnd:
      matches->e[0] = toParse;
      endChange = true;
      return true;
#endif
    default:
      return false;
  };
}
Beispiel #5
0
bool CRegExp::isNWordBoundary(int &toParse)
{
  return !isWordBoundary(toParse);
};
int String::find (char const* str, int start, int options) const
{
  static int kmpbuf[256];
#define comp(a,b) (options&FIND_CASE_INSENSITIVE?tolower((unsigned char)a)==tolower((unsigned char)b):(a)==(b))
  if (start < 0) start += _len (buf);
  if (start < 0) start = 0;
  if (start > _len (buf)) start = _len (buf);
  if (*str == 0) return start;
  int len = (int) strlen (str);
  int* kmp = kmpbuf;
  if (len > 256)
    kmp = new int[len];
  kmp[0] = 0;
  int result = -1;
  if (options & FIND_REVERSE)
  {
    for (int i = 1; i < len; i++)
    {
      kmp[i] = kmp[i - 1];
      while (kmp[i] && !comp (str[len - 1 - kmp[i]], str[len - 1 - i]))
        kmp[i] = kmp[kmp[i] - 1];
      if (comp (str[len - 1 - kmp[i]], str[len - 1 - i]))
        kmp[i]++;
    }
    int cur = 0;
    for (int i = start - 1; i >= 0; i--)
    {
      while (cur && !comp (str[len - 1 - cur], buf[_len (buf) - 1 - i]))
        cur = kmp[cur - 1];
      if (comp (str[len - 1 - cur], buf[_len (buf) - 1 - i]))
        cur++;
      if (cur == len)
      {
        if ((options & FIND_WHOLE_WORD) == 0 || (
          isWordBoundary (i) && isWordBoundary (i + len)))
        {
          result = i;
          break;
        }
      }
    }
  }
  else
  {
    for (int i = 1; i < len; i++)
    {
      kmp[i] = kmp[i - 1];
      while (kmp[i] && !comp (str[kmp[i]], str[i]))
        kmp[i] = kmp[kmp[i] - 1];
      if (comp (str[kmp[i]], str[i]))
        kmp[i]++;
    }
    int cur = 0;
    for (int i = start; i < _len (buf); i++)
    {
      while (cur && !comp (str[cur], buf[i]))
        cur = kmp[cur - 1];
      if (comp (str[cur], buf[i]))
        cur++;
      if (cur == len)
      {
        if ((options & FIND_WHOLE_WORD) == 0 || (
          isWordBoundary (i - len + 1) && isWordBoundary (i + 1)))
        {
          result = i - len + 1;
          break;
        }
      }
    }
  }
  if (kmp != kmpbuf)
    delete[] kmp;
  return result;
}