/* - regrepeat - repeatedly match something simple, report how many */ static int regrepeat(char *p) { register int count = 0, len = 0; register char *scan; register char *opnd; scan = reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: while (( (len = CHARLEN(scan)) > 0)) { count++; scan += len; reglmlen = len; } break; case EXACTLY: { int len = 0; len = CHARLEN(opnd); while (len > 0 && (CHARLEN(scan) == len) && !strncmp(opnd, scan, len)) { count++; scan += len; reglmlen = len; } } break; case ANYOF: while (( (len = CHARLEN(scan)) > 0) && inclass(opnd, scan)) { count++; scan += len; reglmlen = len; } break; case ANYBUT: while ( ((len = CHARLEN(scan)) > 0) && !inclass(opnd, scan)) { count++; scan += len; reglmlen = len; } break; default: /* Oh dear. Called inappropriately. */ count = 0; /* Best compromise. */ break; } reginput = scan; return(count); }
Rune* ucvt(Rune* s) { Rune* u; char *t; int i, c, n, j, len; t = smprint("%S", s); n = strlen(t); len = 0; for(i=0; i<n; i++) { c = t[i]; if(inclass(c, L"- /$_@.!*'(),a-zA-Z0-9")) len++; else len += 3; } u = runemalloc(len+1); j = 0; for(i=0; i<n; i++) { c = t[i]; if(inclass(c, L"-/$_@.!*'(),a-zA-Z0-9")) u[j++] = c; else if(c == ' ') u[j++] = '+'; else { u[j++] = '%'; u[j++] = hexdigit((c >> 4)&15); u[j++] = hexdigit(c&15); } } u[j] = 0; free(t); return u; }
/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */ static int /* 0 failure, 1 success */ regmatch(char *prog) { register char *scan; /* Current node. */ char *next; /* Next node. */ wchar_t wc = L'\0'; int len; scan = prog; while (scan != NULL) { next = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) return(0); break; case EOL: if (CHARLEN(reginput) != 0) return(0); break; case WORDA: /* Must be looking at a letter, digit, or _ */ len = mbtowc(&wc, reginput, MB_CUR_MAX); if (len == -1) wc = *reginput; if ((!iswalnum(wc)) && wc != L'_') return(0); /* Prev must be BOL or nonword */ len = mbtowc(&wc, reginput - reglmlen, MB_CUR_MAX); if (len == -1) { wc = *(reginput- reglmlen); len = 1; } if (reginput > regbol && (iswalnum(wc) || wc == L'_')) return(0); break; case WORDZ: len = mbtowc(&wc, reginput, MB_CUR_MAX); if (len == -1) { wc = *reginput; len = 1; } /* Must be looking at non letter, digit, or _ */ if (iswalnum(wc) || wc == L'_') return(0); /* We don't care what the previous char was */ break; case ANY: /* Solaris 2.6 Motif diff bug 1236359 - 1 line */ if ( (len = CHARLEN(reginput)) <= 0) return(0); reglmlen = len; reginput += INCRLEN(len); break; case EXACTLY: { register int len; register int clen; register char *opnd; register char *op, *ip; opnd = OPERAND(scan); len = strlen(opnd); for (clen = len, op = opnd, ip = reginput; clen; ) { int opl = CHARLEN(op), ipl = CHARLEN(ip); if (opl == ipl && !strncmp(op, ip, ipl)) { op += ipl; ip += ipl; clen -= ipl; reglmlen = ipl; } else break; } if (clen) return(0); reginput += len; } break; case ANYOF: /* Solaris 2.6 motif diff bug 1236359 - 1 line */ if ( ((len = CHARLEN(reginput)) <= 0) || !inclass(OPERAND(scan), reginput)) return 0; reginput += len; reglmlen = len; break; case ANYBUT: /* Solaris 2.6 motif diff bug 1236359 - 1 line */ if ( ((len = CHARLEN(reginput)) <= 0) || inclass(OPERAND(scan), reginput)) return 0; reginput += len; reglmlen = len; break; case NOTHING: break; case BACK: break; case OPEN + 1: case OPEN + 2: case OPEN + 3: case OPEN + 4: case OPEN + 5: case OPEN + 6: case OPEN + 7: case OPEN + 8: case OPEN + 9: { register int no; register char *save; no = OP(scan) - OPEN; save = reginput; if (regmatch(next)) { /* * Don't set startp if some later * invocation of the same parentheses * already has. */ if (regstartp[no] == NULL) regstartp[no] = save; return(1); } else return(0); } break; case CLOSE + 1: case CLOSE + 2: case CLOSE + 3: case CLOSE + 4: case CLOSE + 5: case CLOSE + 6: case CLOSE + 7: case CLOSE + 8: case CLOSE + 9: { register int no; register char *save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(next)) { /* * Don't set endp if some later * invocation of the same parentheses * already has. */ if (regendp[no] == NULL) regendp[no] = save; return(1); } else return(0); } break; case BRANCH: { register char *save; if (OP(next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ else { do { save = reginput; if (regmatch(OPERAND(scan))) return(1); reginput = save; scan = regnext(scan); } while (scan != NULL && OP(scan) == BRANCH); return(0); /* NOTREACHED */ } } break; case STAR: case PLUS: { register char *nextch; register int no; register char *save; register int min; int nchars = 0; /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ nextch = 0; if (OP(next) == EXACTLY) nextch = OPERAND(next); min = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= min) { /* Solaris 2.6 motif diff bug 1236359 - 1 line */ int mb_len = 0; /* If it could work, try it. */ if (!nextch || !(len = CHARLEN(nextch)) || !strncmp(reginput, nextch, len) ) if (regmatch(next)) return(1); /* Couldn't or didn't -- back up. */ no--; reginput = save; /* Solaris 2.6 motif diff bug 1236359 - 4 lines */ for (nchars = 0; nchars < no && mb_len >= 0; nchars++) { mb_len = CHARLEN(reginput); if (mb_len > 0) reginput += mb_len; } } return(0); } break; case END: return(1); /* Success! */ break; default: return(0); break; } scan = next; } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ return(0); }