コード例 #1
0
/* get_word:
 *  Reads a word from the input stream.
 */
static void get_word(char *buf)
{
   int c, i;

   for (;;) {
      c = egg_getc();

      if (c == EOF) {
	 buf[0] = 0;
	 return;
      }

      if (iswordchar(c)) {
	 i = 0;

	 for (;;) {
	    buf[i++] = c;

	    c = egg_getc();

	    if (!iswordchar(c)) {
	       egg_ungetc(c);
	       buf[i] = 0;
	       return;
	    }
	 }
      }

      if (!isspace(c)) {
	 buf[0] = c;
	 buf[1] = 0;
	 return;
      }
   }
}
コード例 #2
0
// try to match "findText" from "start" with whitespace tolerance
// (ignore all whitespace except after alphanumeric characters)
int TextSearch::MatchLen(const WCHAR *start) const
{
    const WCHAR *match = findText, *end = start;

    if (matchWordStart && start > pageText && iswordchar(start[-1]) && iswordchar(start[0]))
        return -1;

    if (!match)
        return -1;

    while (*match) {
        if (!*end)
            return -1;
        if (caseSensitive ? *match == *end : CharLower((LPWSTR)LOWORD(*match)) == CharLower((LPWSTR)LOWORD(*end)))
            /* characters are identical */;
        else if (str::IsWs(*match) && str::IsWs(*end))
            /* treat all whitespace as identical */;
        // TODO: Adobe Reader seems to have a more extensive list of
        //       normalizations - is there an easier way?
        else if (*match == '-' && (0x2010 <= *end && *end <= 0x2014))
            /* make HYPHEN-MINUS also match HYPHEN, NON-BREAKING HYPHEN,
               FIGURE DASH, EN DASH and EM DASH (but not the other way around) */;
        else if (*match == '\'' && (0x2018 <= *end && *end <= 0x201b))
            /* make APOSTROPHE also match LEFT/RIGHT SINGLE QUOTATION MARK */;
        else if (*match == '"' && (0x201c <= *end && *end <= 0x201f))
            /* make QUOTATION MARK also match LEFT/RIGHT DOUBLE QUOTATION MARK */;
        else
            return -1;
        match++;
        end++;
        // treat "??" and "? ?" differently, since '?' could have been a word
        // character that's just missing an encoding (and '?' is the replacement
        // character); cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1574
        if (*match && !isnoncjkwordchar(*(match - 1)) && (*(match - 1) != '?' || *match != '?') ||
            str::IsWs(*(match - 1)) && str::IsWs(*(end - 1))) {
            SkipWhitespace(match);
            SkipWhitespace(end);
        }
    }

    if (matchWordEnd && end > pageText && iswordchar(end[-1]) && iswordchar(end[0]))
        return -1;

    return (int)(end - start);
}
コード例 #3
0
ファイル: parse.c プロジェクト: crowding/vadr
const char *forward_word_or_char(const char *str, step_t step) {
  if (*str && isstartword(*str)) {
    for (str = (*step)(str, 1);
         *str && iswordchar(*str);
         str = (*step)(str, 1));
  } else {
    str = (*step)(str, 1);
  }
  return str;
}
コード例 #4
0
ファイル: TextRange.cpp プロジェクト: RazvanB/sumatrapdf
int SumatraUIAutomationTextRange::FindNextWordEndpoint(int pageno, int idx, bool dontReturnInitial)
{
    int textLen;
    const WCHAR *pageText = document->GetDM()->textCache->GetData(pageno, &textLen);

    if (dontReturnInitial) {
        for (; idx < textLen; idx++)
        {
            if (iswordchar(pageText[idx]))
                break;
        }
    }

    for (; idx < textLen; idx++)
    {
        if (!iswordchar(pageText[idx]))
            break;
    }
    return idx;
}
コード例 #5
0
ファイル: TextRange.cpp プロジェクト: RazvanB/sumatrapdf
int SumatraUIAutomationTextRange::FindPreviousWordEndpoint(int pageno, int idx, bool dontReturnInitial)
{
    // based on TextSelection::SelectWordAt
    int textLen;
    const WCHAR *pageText = document->GetDM()->textCache->GetData(pageno, &textLen);

    if (dontReturnInitial) {
        for (; idx > 0; idx--)
        {
            if (iswordchar(pageText[idx - 1]))
                break;
        }
    }

    for (; idx > 0; idx--)
    {
        if (!iswordchar(pageText[idx - 1]))
            break;
    }
    return idx;
}
コード例 #6
0
/* check_ascii_word:
 *  Checks that a string is a valid variable name.
 */
static void check_ascii_word(char *buf, char *error)
{
   int i;

   for (i=0; buf[i]; i++) {
      if (!iswordchar(buf[i]))
	 break;
   }

   if (i <= 0)
      egg_error(error, "Missing identifier");
   else if (buf[i])
      egg_error(error, "Invalid character ('%c') in identifier", buf[i]);
}
コード例 #7
0
ファイル: qregexp.cpp プロジェクト: opieproject/qte-opie
static int matchstring( uint *rxd, const QChar *str, uint strlength,
			const QChar *bol, bool cs )
{
    const QChar *p = str;
    const QChar *start = p;
    uint pl = strlength;
    uint *d = rxd;

    //### in all cases here: handle pl == 0! (don't read past strlen)
    while ( *d ) {
	if ( *d & CHR ) {			// match char
	    if ( !pl )
		return -1;
	    QChar c( *d );
	    if ( !cs && !c.row() ) {		// case insensitive, #Only 8bit
		if ( p->row() || tolower(p->cell()) != c.cell() )
		    return -1;
		p++;
		pl--;
	    } else {				// case insensitive
		if ( *p != c )
		    return -1;
		p++;
		pl--;
	    }
	    d++;
	}
	else if ( *d & MCC ) {			// match char class
	    if ( !pl )
		return -1;
	    if ( !matchcharclass( d, *p ) )
		return -1;
	    p++;
	    pl--;
	    d += (*d & MVL) + 1;
	}
	else switch ( *d++ ) {
	    case PWS:				// match whitespace
		if ( !pl || !p->isSpace() )
		    return -1;
		p++;
		pl--;
		break;
	    case PDG:				// match digits
		if ( !pl || !p->isDigit() )
		    return -1;
		p++;
		pl--;
		break;
	    case ANY:				// match anything
		if ( !pl )
		    return -1;
		p++;
		pl--;
		break;
	    case BOL:				// match beginning of line
		if ( p != bol )
		    return -1;
		break;
	    case EOL:				// match end of line
		if ( pl )
		    return -1;
		break;
	    case BOW:				// match beginning of word
		if ( !iswordchar(*p) || (p > bol && iswordchar(*(p-1)) ) )
		    return -1;
		break;
	    case EOW:				// match end of word
		if ( iswordchar(*p) || p == bol || !iswordchar(*(p-1)) )
		    return -1;
		break;
	    case CLO:				// Kleene closure
		{
		const QChar *first_p = p;
		if ( *d & CHR ) {		// match char
		    QChar c( *d );
		    if ( !cs && !c.row() ) {	// case insensitive, #only 8bit
			while ( pl && !p->row() && tolower(p->cell())==c.cell() ) {
			    p++;
			    pl--;
			}
		    }
		    else {			// case sensitive
			while ( pl && *p == c ) {
			    p++;
			    pl--;
			}
		    }
		    d++;
		}
		else if ( *d & MCC ) {			// match char class
		    while( pl && matchcharclass( d, *p ) ) {
			p++;
			pl--;
		    }
		    d += (*d & MVL) + 1;
		}
		else if ( *d == PWS ) {
		    while ( pl && p->isSpace() ) {
			p++;
			pl--;
		    }
		    d++;
		}
		else if ( *d == PDG ) {
		    while ( pl && p->isDigit() ) {
			p++;
			pl--;
		    }
		    d++;
		}
		else if ( *d == ANY ) {
		    p += pl;
		    pl = 0;
		    d++;
		}
		else {
		    return -1;			// error
		}
		d++;				// skip CLO's END
		while ( p >= first_p ) {	// go backwards
		    int end = matchstring( d, p, pl, bol, cs );
		    if ( end >= 0 )
			return ( p - start ) + end;
		    if ( !p )
			return -1;
		    --p;
		    ++pl;
		}
		}
		return -1;
	    case OPT:				// optional closure
		{
		const QChar *first_p = p;
		if ( *d & CHR ) {		// match char
		    QChar c( *d );
		    if ( !cs && !c.row() ) {	// case insensitive, #only 8bit
			if ( pl && !p->row() && tolower(p->cell()) == c.cell() ) {
			    p++;
			    pl--;
			}
		    }
		    else {			// case sensitive
			if ( pl && *p == c ) {
			    p++;
			    pl--;
			}
		    }
		    d++;
		}
		else if ( *d & MCC ) {			// match char class
		    if ( pl && matchcharclass( d, *p ) ) {
			p++;
			pl--;
		    }
		    d += (*d & MVL) + 1;
		}
		else if ( *d == PWS ) {
		    if ( pl && p->isSpace() ) {
			p++;
			pl--;
		    }
		    d++;
		}
		else if ( *d == PDG ) {
		    if ( pl && p->isDigit() ) {
			p++;
			pl--;
		    }
		    d++;
		}
		else if ( *d == ANY ) {
		    if ( pl ) {
			p++;
			pl--;
		    }
		    d++;
		}
		else {
		    return -1;			// error
		}
		d++;				// skip OPT's END
		while ( p >= first_p ) {	// go backwards
		    int end = matchstring( d, p, pl, bol, cs );
		    if ( end >= 0 )
			return ( p - start ) + end;
		    if ( !p )
			return -1;
		    --p;
		    ++pl;
		}
		}
		return -1;

	    default:				// error
		return -1;
	}
    }
    return p - start;
}
コード例 #8
0
ファイル: Legacy.cpp プロジェクト: legit-hax/npp-gmod-lua
//  <--- Fold --->
void Fold_Doc(unsigned int startPos, unsigned int length, int initStyle, WordList *[], Accessor &styler)
{
    unsigned int lengthDoc = startPos + length;
    int visibleChars = 0;
    int lineCurrent = styler.GetLine(startPos);
    int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
    int levelCurrent = levelPrev;
    char chNext = styler[startPos];
    bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
    int styleNext = styler.StyleAt(startPos);
    int style = initStyle;
    char s[10];

    for (unsigned int i = startPos; i < lengthDoc; i++) {
        char ch = chNext;
        chNext = styler.SafeGetCharAt(i + 1);
        int stylePrev = style;
        style = styleNext;
        styleNext = styler.StyleAt(i + 1);
        bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
        if (style == WORD0 && stylePrev != WORD0) {
            if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') {
                unsigned int j;
                for (j = 0; j < 8 && iswordchar(styler.SafeGetCharAt(i + j)); j++) {
                    s[j] = styler[i + j];
                }
                s[j] = '\0';

                if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) {
                    levelCurrent++;
                }
                if ((strcmp(s, "end") == 0) || (strcmp(s, "until") == 0)) {
                    levelCurrent--;
                }
            }
        } else if (style == OPERATOR) {
            if (ch == '{') {
                levelCurrent++;
            } else if (ch == '}') {
                levelCurrent--;
            }
        } else if ((style == LITERALSTRING || style == LUA_COMMENT || style == CPP_COMMENT) &&
                   !(stylePrev == LITERALSTRING || stylePrev == LUA_COMMENT || stylePrev == CPP_COMMENT) &&
                   (ch == '[' || ch == '/' || ch == '-')) {
            levelCurrent++;
        } else if ((style == LITERALSTRING || style == LUA_COMMENT || style == CPP_COMMENT) &&
                   !(styleNext == LITERALSTRING || styleNext == LUA_COMMENT || styleNext == CPP_COMMENT) &&
                   (ch == ']' || ch == '/')) {
            levelCurrent--;
        } else if (style == CPP_COMMENTLINE || style == LUA_COMMENTLINE) {
            if ((ch == '/' && chNext == '/') || (ch == '-' && chNext == '-')) {
                char chNext2 = styler.SafeGetCharAt(i + 2);
                if (chNext2 == '{') {
                    levelCurrent++;
                } else if (chNext2 == '}') {
                    levelCurrent--;
                }
            }
        }

        if (!isspacechar(ch)) {
            visibleChars++;
        }
        if (atEOL || (i == lengthDoc-1)) {
            int lev = levelPrev;
            if (visibleChars == 0 && foldCompact) {
                lev |= SC_FOLDLEVELWHITEFLAG;
            }
            if (levelCurrent > levelPrev) {
                lev |= SC_FOLDLEVELHEADERFLAG;
            }
            if (lev != styler.LevelAt(lineCurrent)) {
                styler.SetLevel(lineCurrent, lev);
            }
            lineCurrent++;
            levelPrev = levelCurrent;
            visibleChars = 0;
        }
    }
    char lastChar = styler.SafeGetCharAt(lengthDoc-1);
    if ((unsigned)styler.Length() == lengthDoc && (lastChar == '\n' || lastChar == '\r')) {
        styler.SetLevel(lineCurrent, levelCurrent);
    }
}
コード例 #9
0
ファイル: regexp.c プロジェクト: MajenkoLibraries/MuJS
static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out)
{
	Rethread ready[MAXTHREAD];
	Resub scratch;
	Resub sub;
	Rune c;
	int nready;
	int i;

	/* queue initial thread */
	spawn(ready + 0, pc, sp, out);
	nready = 1;

	/* run threads in stack order */
	while (nready > 0) {
		--nready;
		pc = ready[nready].pc;
		sp = ready[nready].sp;
		memcpy(&sub, &ready[nready].sub, sizeof sub);
		for (;;) {
			switch (pc->opcode) {
			case I_END:
				for (i = 0; i < MAXSUB; ++i) {
					out->sub[i].sp = sub.sub[i].sp;
					out->sub[i].ep = sub.sub[i].ep;
				}
				return 1;
			case I_JUMP:
				pc = pc->x;
				continue;
			case I_SPLIT:
				if (nready >= MAXTHREAD) {
					fprintf(stderr, "regexec: backtrack overflow!\n");
					return 0;
				}
				spawn(&ready[nready++], pc->y, sp, &sub);
				pc = pc->x;
				continue;

			case I_PLA:
				if (!match(pc->x, sp, bol, flags, &sub))
					goto dead;
				pc = pc->y;
				continue;
			case I_NLA:
				memcpy(&scratch, &sub, sizeof scratch);
				if (match(pc->x, sp, bol, flags, &scratch))
					goto dead;
				pc = pc->y;
				continue;

			case I_ANYNL:
				sp += chartorune(&c, sp);
				if (c == 0)
					goto dead;
				break;
			case I_ANY:
				sp += chartorune(&c, sp);
				if (c == 0)
					goto dead;
				if (isnewline(c))
					goto dead;
				break;
			case I_CHAR:
				sp += chartorune(&c, sp);
				if (c == 0)
					goto dead;
				if (flags & REG_ICASE)
					c = canon(c);
				if (c != pc->c)
					goto dead;
				break;
			case I_CCLASS:
				sp += chartorune(&c, sp);
				if (c == 0)
					goto dead;
				if (flags & REG_ICASE) {
					if (!incclasscanon(pc->cc, canon(c)))
						goto dead;
				} else {
					if (!incclass(pc->cc, c))
						goto dead;
				}
				break;
			case I_NCCLASS:
				sp += chartorune(&c, sp);
				if (c == 0)
					goto dead;
				if (flags & REG_ICASE) {
					if (incclasscanon(pc->cc, canon(c)))
						goto dead;
				} else {
					if (incclass(pc->cc, c))
						goto dead;
				}
				break;
			case I_REF:
				i = sub.sub[pc->n].ep - sub.sub[pc->n].sp;
				if (flags & REG_ICASE) {
					if (strncmpcanon(sp, sub.sub[pc->n].sp, i))
						goto dead;
				} else {
					if (strncmp(sp, sub.sub[pc->n].sp, i))
						goto dead;
				}
				if (i > 0)
					sp += i;
				break;

			case I_BOL:
				if (sp == bol && !(flags & REG_NOTBOL))
					break;
				if (flags & REG_NEWLINE)
					if (sp > bol && isnewline(sp[-1]))
						break;
				goto dead;
			case I_EOL:
				if (*sp == 0)
					break;
				if (flags & REG_NEWLINE)
					if (isnewline(*sp))
						break;
				goto dead;
			case I_WORD:
				i = sp > bol && iswordchar(sp[-1]);
				i ^= iswordchar(sp[0]);
				if (i)
					break;
				goto dead;
			case I_NWORD:
				i = sp > bol && iswordchar(sp[-1]);
				i ^= iswordchar(sp[0]);
				if (!i)
					break;
				goto dead;

			case I_LPAR:
				sub.sub[pc->n].sp = sp;
				break;
			case I_RPAR:
				sub.sub[pc->n].ep = sp;
				break;
			default:
				goto dead;
			}
			pc = pc + 1;
		}
dead: ;
	}
	return 0;
}