/* get_word: * Reads a word from the input stream. */ static void get_word(char *buf) { int c, i; for (;;) { c = egg_getc(); if (c == EOF) { buf[0] = 0; return; } if (iswordchar(c)) { i = 0; for (;;) { buf[i++] = c; c = egg_getc(); if (!iswordchar(c)) { egg_ungetc(c); buf[i] = 0; return; } } } if (!isspace(c)) { buf[0] = c; buf[1] = 0; return; } } }
// try to match "findText" from "start" with whitespace tolerance // (ignore all whitespace except after alphanumeric characters) int TextSearch::MatchLen(const WCHAR *start) const { const WCHAR *match = findText, *end = start; if (matchWordStart && start > pageText && iswordchar(start[-1]) && iswordchar(start[0])) return -1; if (!match) return -1; while (*match) { if (!*end) return -1; if (caseSensitive ? *match == *end : CharLower((LPWSTR)LOWORD(*match)) == CharLower((LPWSTR)LOWORD(*end))) /* characters are identical */; else if (str::IsWs(*match) && str::IsWs(*end)) /* treat all whitespace as identical */; // TODO: Adobe Reader seems to have a more extensive list of // normalizations - is there an easier way? else if (*match == '-' && (0x2010 <= *end && *end <= 0x2014)) /* make HYPHEN-MINUS also match HYPHEN, NON-BREAKING HYPHEN, FIGURE DASH, EN DASH and EM DASH (but not the other way around) */; else if (*match == '\'' && (0x2018 <= *end && *end <= 0x201b)) /* make APOSTROPHE also match LEFT/RIGHT SINGLE QUOTATION MARK */; else if (*match == '"' && (0x201c <= *end && *end <= 0x201f)) /* make QUOTATION MARK also match LEFT/RIGHT DOUBLE QUOTATION MARK */; else return -1; match++; end++; // treat "??" and "? ?" differently, since '?' could have been a word // character that's just missing an encoding (and '?' is the replacement // character); cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1574 if (*match && !isnoncjkwordchar(*(match - 1)) && (*(match - 1) != '?' || *match != '?') || str::IsWs(*(match - 1)) && str::IsWs(*(end - 1))) { SkipWhitespace(match); SkipWhitespace(end); } } if (matchWordEnd && end > pageText && iswordchar(end[-1]) && iswordchar(end[0])) return -1; return (int)(end - start); }
const char *forward_word_or_char(const char *str, step_t step) { if (*str && isstartword(*str)) { for (str = (*step)(str, 1); *str && iswordchar(*str); str = (*step)(str, 1)); } else { str = (*step)(str, 1); } return str; }
int SumatraUIAutomationTextRange::FindNextWordEndpoint(int pageno, int idx, bool dontReturnInitial) { int textLen; const WCHAR *pageText = document->GetDM()->textCache->GetData(pageno, &textLen); if (dontReturnInitial) { for (; idx < textLen; idx++) { if (iswordchar(pageText[idx])) break; } } for (; idx < textLen; idx++) { if (!iswordchar(pageText[idx])) break; } return idx; }
int SumatraUIAutomationTextRange::FindPreviousWordEndpoint(int pageno, int idx, bool dontReturnInitial) { // based on TextSelection::SelectWordAt int textLen; const WCHAR *pageText = document->GetDM()->textCache->GetData(pageno, &textLen); if (dontReturnInitial) { for (; idx > 0; idx--) { if (iswordchar(pageText[idx - 1])) break; } } for (; idx > 0; idx--) { if (!iswordchar(pageText[idx - 1])) break; } return idx; }
/* check_ascii_word: * Checks that a string is a valid variable name. */ static void check_ascii_word(char *buf, char *error) { int i; for (i=0; buf[i]; i++) { if (!iswordchar(buf[i])) break; } if (i <= 0) egg_error(error, "Missing identifier"); else if (buf[i]) egg_error(error, "Invalid character ('%c') in identifier", buf[i]); }
static int matchstring( uint *rxd, const QChar *str, uint strlength, const QChar *bol, bool cs ) { const QChar *p = str; const QChar *start = p; uint pl = strlength; uint *d = rxd; //### in all cases here: handle pl == 0! (don't read past strlen) while ( *d ) { if ( *d & CHR ) { // match char if ( !pl ) return -1; QChar c( *d ); if ( !cs && !c.row() ) { // case insensitive, #Only 8bit if ( p->row() || tolower(p->cell()) != c.cell() ) return -1; p++; pl--; } else { // case insensitive if ( *p != c ) return -1; p++; pl--; } d++; } else if ( *d & MCC ) { // match char class if ( !pl ) return -1; if ( !matchcharclass( d, *p ) ) return -1; p++; pl--; d += (*d & MVL) + 1; } else switch ( *d++ ) { case PWS: // match whitespace if ( !pl || !p->isSpace() ) return -1; p++; pl--; break; case PDG: // match digits if ( !pl || !p->isDigit() ) return -1; p++; pl--; break; case ANY: // match anything if ( !pl ) return -1; p++; pl--; break; case BOL: // match beginning of line if ( p != bol ) return -1; break; case EOL: // match end of line if ( pl ) return -1; break; case BOW: // match beginning of word if ( !iswordchar(*p) || (p > bol && iswordchar(*(p-1)) ) ) return -1; break; case EOW: // match end of word if ( iswordchar(*p) || p == bol || !iswordchar(*(p-1)) ) return -1; break; case CLO: // Kleene closure { const QChar *first_p = p; if ( *d & CHR ) { // match char QChar c( *d ); if ( !cs && !c.row() ) { // case insensitive, #only 8bit while ( pl && !p->row() && tolower(p->cell())==c.cell() ) { p++; pl--; } } else { // case sensitive while ( pl && *p == c ) { p++; pl--; } } d++; } else if ( *d & MCC ) { // match char class while( pl && matchcharclass( d, *p ) ) { p++; pl--; } d += (*d & MVL) + 1; } else if ( *d == PWS ) { while ( pl && p->isSpace() ) { p++; pl--; } d++; } else if ( *d == PDG ) { while ( pl && p->isDigit() ) { p++; pl--; } d++; } else if ( *d == ANY ) { p += pl; pl = 0; d++; } else { return -1; // error } d++; // skip CLO's END while ( p >= first_p ) { // go backwards int end = matchstring( d, p, pl, bol, cs ); if ( end >= 0 ) return ( p - start ) + end; if ( !p ) return -1; --p; ++pl; } } return -1; case OPT: // optional closure { const QChar *first_p = p; if ( *d & CHR ) { // match char QChar c( *d ); if ( !cs && !c.row() ) { // case insensitive, #only 8bit if ( pl && !p->row() && tolower(p->cell()) == c.cell() ) { p++; pl--; } } else { // case sensitive if ( pl && *p == c ) { p++; pl--; } } d++; } else if ( *d & MCC ) { // match char class if ( pl && matchcharclass( d, *p ) ) { p++; pl--; } d += (*d & MVL) + 1; } else if ( *d == PWS ) { if ( pl && p->isSpace() ) { p++; pl--; } d++; } else if ( *d == PDG ) { if ( pl && p->isDigit() ) { p++; pl--; } d++; } else if ( *d == ANY ) { if ( pl ) { p++; pl--; } d++; } else { return -1; // error } d++; // skip OPT's END while ( p >= first_p ) { // go backwards int end = matchstring( d, p, pl, bol, cs ); if ( end >= 0 ) return ( p - start ) + end; if ( !p ) return -1; --p; ++pl; } } return -1; default: // error return -1; } } return p - start; }
// <--- Fold ---> void Fold_Doc(unsigned int startPos, unsigned int length, int initStyle, WordList *[], Accessor &styler) { unsigned int lengthDoc = startPos + length; int visibleChars = 0; int lineCurrent = styler.GetLine(startPos); int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; int levelCurrent = levelPrev; char chNext = styler[startPos]; bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; int styleNext = styler.StyleAt(startPos); int style = initStyle; char s[10]; for (unsigned int i = startPos; i < lengthDoc; i++) { char ch = chNext; chNext = styler.SafeGetCharAt(i + 1); int stylePrev = style; style = styleNext; styleNext = styler.StyleAt(i + 1); bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); if (style == WORD0 && stylePrev != WORD0) { if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') { unsigned int j; for (j = 0; j < 8 && iswordchar(styler.SafeGetCharAt(i + j)); j++) { s[j] = styler[i + j]; } s[j] = '\0'; if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) { levelCurrent++; } if ((strcmp(s, "end") == 0) || (strcmp(s, "until") == 0)) { levelCurrent--; } } } else if (style == OPERATOR) { if (ch == '{') { levelCurrent++; } else if (ch == '}') { levelCurrent--; } } else if ((style == LITERALSTRING || style == LUA_COMMENT || style == CPP_COMMENT) && !(stylePrev == LITERALSTRING || stylePrev == LUA_COMMENT || stylePrev == CPP_COMMENT) && (ch == '[' || ch == '/' || ch == '-')) { levelCurrent++; } else if ((style == LITERALSTRING || style == LUA_COMMENT || style == CPP_COMMENT) && !(styleNext == LITERALSTRING || styleNext == LUA_COMMENT || styleNext == CPP_COMMENT) && (ch == ']' || ch == '/')) { levelCurrent--; } else if (style == CPP_COMMENTLINE || style == LUA_COMMENTLINE) { if ((ch == '/' && chNext == '/') || (ch == '-' && chNext == '-')) { char chNext2 = styler.SafeGetCharAt(i + 2); if (chNext2 == '{') { levelCurrent++; } else if (chNext2 == '}') { levelCurrent--; } } } if (!isspacechar(ch)) { visibleChars++; } if (atEOL || (i == lengthDoc-1)) { int lev = levelPrev; if (visibleChars == 0 && foldCompact) { lev |= SC_FOLDLEVELWHITEFLAG; } if (levelCurrent > levelPrev) { lev |= SC_FOLDLEVELHEADERFLAG; } if (lev != styler.LevelAt(lineCurrent)) { styler.SetLevel(lineCurrent, lev); } lineCurrent++; levelPrev = levelCurrent; visibleChars = 0; } } char lastChar = styler.SafeGetCharAt(lengthDoc-1); if ((unsigned)styler.Length() == lengthDoc && (lastChar == '\n' || lastChar == '\r')) { styler.SetLevel(lineCurrent, levelCurrent); } }
static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out) { Rethread ready[MAXTHREAD]; Resub scratch; Resub sub; Rune c; int nready; int i; /* queue initial thread */ spawn(ready + 0, pc, sp, out); nready = 1; /* run threads in stack order */ while (nready > 0) { --nready; pc = ready[nready].pc; sp = ready[nready].sp; memcpy(&sub, &ready[nready].sub, sizeof sub); for (;;) { switch (pc->opcode) { case I_END: for (i = 0; i < MAXSUB; ++i) { out->sub[i].sp = sub.sub[i].sp; out->sub[i].ep = sub.sub[i].ep; } return 1; case I_JUMP: pc = pc->x; continue; case I_SPLIT: if (nready >= MAXTHREAD) { fprintf(stderr, "regexec: backtrack overflow!\n"); return 0; } spawn(&ready[nready++], pc->y, sp, &sub); pc = pc->x; continue; case I_PLA: if (!match(pc->x, sp, bol, flags, &sub)) goto dead; pc = pc->y; continue; case I_NLA: memcpy(&scratch, &sub, sizeof scratch); if (match(pc->x, sp, bol, flags, &scratch)) goto dead; pc = pc->y; continue; case I_ANYNL: sp += chartorune(&c, sp); if (c == 0) goto dead; break; case I_ANY: sp += chartorune(&c, sp); if (c == 0) goto dead; if (isnewline(c)) goto dead; break; case I_CHAR: sp += chartorune(&c, sp); if (c == 0) goto dead; if (flags & REG_ICASE) c = canon(c); if (c != pc->c) goto dead; break; case I_CCLASS: sp += chartorune(&c, sp); if (c == 0) goto dead; if (flags & REG_ICASE) { if (!incclasscanon(pc->cc, canon(c))) goto dead; } else { if (!incclass(pc->cc, c)) goto dead; } break; case I_NCCLASS: sp += chartorune(&c, sp); if (c == 0) goto dead; if (flags & REG_ICASE) { if (incclasscanon(pc->cc, canon(c))) goto dead; } else { if (incclass(pc->cc, c)) goto dead; } break; case I_REF: i = sub.sub[pc->n].ep - sub.sub[pc->n].sp; if (flags & REG_ICASE) { if (strncmpcanon(sp, sub.sub[pc->n].sp, i)) goto dead; } else { if (strncmp(sp, sub.sub[pc->n].sp, i)) goto dead; } if (i > 0) sp += i; break; case I_BOL: if (sp == bol && !(flags & REG_NOTBOL)) break; if (flags & REG_NEWLINE) if (sp > bol && isnewline(sp[-1])) break; goto dead; case I_EOL: if (*sp == 0) break; if (flags & REG_NEWLINE) if (isnewline(*sp)) break; goto dead; case I_WORD: i = sp > bol && iswordchar(sp[-1]); i ^= iswordchar(sp[0]); if (i) break; goto dead; case I_NWORD: i = sp > bol && iswordchar(sp[-1]); i ^= iswordchar(sp[0]); if (!i) break; goto dead; case I_LPAR: sub.sub[pc->n].sp = sp; break; case I_RPAR: sub.sub[pc->n].ep = sp; break; default: goto dead; } pc = pc + 1; } dead: ; } return 0; }