static int utl_pmx_get_delimited(const char *pat, const char *txt,int32_t c_beg, int32_t c_end, int32_t c_esc) { int n; const char *s; int cnt; int32_t ch; s = txt; n = utl_pmx_nextch(s,&ch); if (n == 0 || ch != c_beg) return 0; cnt = 0; do { s += n; n = utl_pmx_nextch(s,&ch); if (ch == '\0') return 0; _logdebug("BRACE: '%c' cnt:%d",ch,cnt); if (ch == c_end) { if (cnt == 0) return (s+n)-txt; else cnt--; } else if (ch == c_beg) { cnt++; } else if (ch == c_esc) { s += n; n = utl_pmx_nextch(s,&ch); } } while (ch); utl_pmx_set_paterror(pat); return s-txt; }
static int utl_pmx_get_limits(const char *pat, const char *pat_end, const char *txt, int braced, int32_t *c_beg_ptr, int32_t *c_end_ptr, int32_t *c_esc_ptr) { int32_t c_beg = '('; int32_t c_end = ')'; int32_t c_esc = '\0'; int32_t ch; _logdebug("BRACE: [%.*s]",pat_end-pat,pat); if (pat < pat_end) { /* <B()\> <Q""\>*/ pat += utl_pmx_nextch(pat,&c_esc); if (pat < pat_end) { c_beg = c_esc; c_esc = '\0'; pat += utl_pmx_nextch(pat,&c_end); } if (pat < pat_end) { pat += utl_pmx_nextch(pat,&c_esc); } } else { /* Just <B> or <Q>, try to infer the braces */ c_beg = '\0'; (void)utl_pmx_nextch(txt,&ch); if (braced) { if (ch == '(') {c_beg=ch; c_end=')';} else if (ch == '[') {c_beg=ch; c_end=']';} else if (ch == '{') {c_beg=ch; c_end='}';} else if (ch == '<') {c_beg=ch; c_end='>';} } else { // Quoted string c_esc = '\\'; if (ch == '"') {c_beg=ch; c_end=ch;} else if (ch == '\'') {c_beg=ch; c_end=ch;} else if (ch == '`') {c_beg=ch; c_end=ch;} else if (ch == 0x91) {c_beg=ch; c_end=0x92;} /* ANSI single quotes */ else if (ch == 0x93) {c_beg=ch; c_end=0x94;} /* ANSI double quotes */ else if (ch == 0x2018) {c_beg=ch; c_end=0x2019;} /* Unicode single quotes */ else if (ch == 0x201C) {c_beg=ch; c_end=0x201D;} /* Unicode double quotes */ } if (c_beg=='\0') { /* Valid both as quoted string or braces */ if (ch == '\xAB') {c_beg=ch; c_end='\xBB';} /* Unicode and ISO-8859-1 "<<" and ">>" */ else if (ch == '\x8B') {c_beg=ch; c_end='\x9B';} /* Unicode and ISO-8859-1 "<" and ">" */ else if (ch == 0x2039) {c_beg=ch; c_end=0x203A;} /* Unicode Single pointing Angle Quotation */ else if (ch == 0x2329) {c_beg=ch; c_end=0x232A;} /* Unicode ANGLE BRACKETS */ else if (ch == 0x27E8) {c_beg=ch; c_end=0x27E9;} /* Unicode MATHEMATICAL ANGLE BRACKETS */ else if (ch == 0x27EA) {c_beg=ch; c_end=0x27EB;} /* Unicode MATHEMATICAL DOUBLE ANGLE BRACKETS */ else return 0; } } _logdebug("open:'%d' close:'%d' esc:'%d'",c_beg,c_end,c_esc); *c_beg_ptr = c_beg; *c_end_ptr = c_end; *c_esc_ptr = c_esc; return 1; }
static const char *utl_pmx_match(const char *pat, const char *txt) { int32_t len; int32_t ch; int32_t c1; int16_t inv =0; utl_pmx_state_s *state; utl_pmx_state_reset(); utl_pmx_state_push(pat,txt,1,1,0); while (*pat) { _logdebug("[MATCH] %d [%s] [%s]",pmxcount(),pat,txt); c1 = 0; switch (*pat) { case '(' : pat++; if (*pat == '|') {inv = 1; pat++;} if (!utl_pmx_state_push(pat,txt,1,1,inv)) utl_pmx_set_paterror(pat); break; case '|' : pat = utl_pmx_alt_skip(pat); break; case ')' : pat++; _logdebug(")->%d",utl_pmx_stack_ptr); if (utl_pmx_stack_ptr < 2) { utl_pmx_set_paterror(pat-1); break; } /* If we are here, we have matched what is in the (...) */ state = utl_pmx_state_top(); inv = state->inv; if (inv) { /* we shouldn't have matched it :( */ utl_pmx_state_pop(); utl_pmx_FAIL; } utl_pmx_capt[state->cap][1] = txt; state->n++; _logdebug("match #%d min:%d max:%d",state->n,state->min_n, state->max_n); if (state->n < state->max_n) { utl_pmx_capt[state->cap][0] = txt; pat = state->pat; /* try to match once more */ } else { utl_pmx_capt[state->cap][0] = state->txt; utl_pmx_state_pop(); } break; case '<' : if (!utl_pmx_class(&pat,&txt)) utl_pmx_FAIL; break; case '%' : if (pat[1]) len = utl_pmx_nextch(++pat, &c1); default : if (c1 == 0) len = utl_pmx_nextch(pat, &c1); len = utl_pmx_nextch(txt, &ch); if (!utl_pmx_case) { ch = utl_pmx_fold(ch); c1 = utl_pmx_fold(c1); } if (ch != c1) { _logdebug("FAIL: %d %d",c1,ch); utl_pmx_FAIL; } txt += len; pat += len; break; fail : pat = utl_pmx_alt(pat, &txt) ; /* search for an alternative */ if (*pat == '\0') utl_pmx_capnum = 0; break; } } utl_pmx_capt[0][1] = txt; for (len = utl_pmx_capnum; len < utl_pmx_MAXCAPT; len++) { utl_pmx_capt[len][0] = utl_pmx_capt[len][1] = NULL; } _logdebug("res: %p - %p",utl_pmx_capt[0][0],utl_pmx_capt[0][1]); return utl_pmx_capt[0][0]; }