NODE * r_force_string(register NODE *s) { NODE *ret; #ifdef GAWKDEBUG if (s == NULL) cant_happen(); if (s->type != Node_val) cant_happen(); if (s->stref <= 0) cant_happen(); if ((s->flags & STR) != 0 && (s->stfmt == -1 || s->stfmt == CONVFMTidx)) return s; #endif ret = format_val(CONVFMT, CONVFMTidx, s); return ret; }
Regexp * make_regexp(char *s, size_t len, int ignorecase, int dfa) { Regexp *rp; const char *rerr; char *src = s; char *temp; char *end = s + len; register char *dest; register int c, c2; /* Handle escaped characters first. */ /* * Build a copy of the string (in dest) with the * escaped characters translated, and generate the regex * from that. */ emalloc(dest, char *, len + 2, "make_regexp"); temp = dest; while (src < end) { if (*src == '\\') { c = *++src; switch (c) { case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v': case 'x': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c2 = parse_escape(&src); if (c2 < 0) cant_happen(); /* * Unix awk treats octal (and hex?) chars * literally in re's, so escape regexp * metacharacters. */ if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x') && strchr("()|*+?.^$\\[]", c2) != NULL) *dest++ = '\\'; *dest++ = (char) c2; break; case '8': case '9': /* a\9b not valid */ *dest++ = c; src++; break; case 'y': /* normally \b */ /* gnu regex op */ if (! do_traditional) { *dest++ = '\\'; *dest++ = 'b'; src++; break; } /* else, fall through */ default: *dest++ = '\\'; *dest++ = (char) c; src++; break; } /* switch */ } else *dest++ = *src++; /* not '\\' */ } /* for */ *dest = '\0' ; /* Only necessary if we print dest ? */ emalloc(rp, Regexp *, sizeof(*rp), "make_regexp"); memset((char *) rp, 0, sizeof(*rp)); rp->pat.allocated = 0; /* regex will allocate the buffer */ emalloc(rp->pat.fastmap, char *, 256, "make_regexp"); if (ignorecase) rp->pat.translate = casetable; else rp->pat.translate = NULL; len = dest - temp; if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL) fatal("%s: /%s/", gettext(rerr), temp); /* gack. this must be done *after* re_compile_pattern */ rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */ if (dfa && ! ignorecase) { dfacomp(temp, len, &(rp->dfareg), TRUE); rp->dfa = TRUE; } else rp->dfa = FALSE; free(temp); return rp; }
AWKNUM r_force_number(register NODE *n) { register char *cp; register char *cpend; char save; char *ptr; unsigned int newflags; extern double strtod(); #ifdef GAWKDEBUG if (n == NULL) cant_happen(); if (n->type != Node_val) cant_happen(); if(n->flags == 0) cant_happen(); if (n->flags & NUM) return n->numbr; #endif /* all the conditionals are an attempt to avoid the expensive strtod */ n->numbr = 0.0; n->flags |= NUM; n->flags &= ~UNINITIALIZED; if (n->stlen == 0) { if (0 && do_lint) lintwarn(_("can't convert string to float")); return 0.0; } cp = n->stptr; if (ISALPHA(*cp)) { if (0 && do_lint) lintwarn(_("can't convert string to float")); return 0.0; } cpend = cp + n->stlen; while (cp < cpend && ISSPACE(*cp)) cp++; if (cp == cpend || ISALPHA(*cp)) { if (0 && do_lint) lintwarn(_("can't convert string to float")); return 0.0; } if (n->flags & MAYBE_NUM) { newflags = NUMBER; n->flags &= ~MAYBE_NUM; } else newflags = 0; if (cpend - cp == 1) { if (ISDIGIT(*cp)) { n->numbr = (AWKNUM)(*cp - '0'); n->flags |= newflags; } else if (0 && do_lint) lintwarn(_("can't convert string to float")); return n->numbr; } if (do_non_decimal_data) { errno = 0; if (! do_traditional && isnondecimal(cp)) { n->numbr = nondec2awknum(cp, cpend - cp); goto finish; } } errno = 0; save = *cpend; *cpend = '\0'; n->numbr = (AWKNUM) strtod((const char *) cp, &ptr); /* POSIX says trailing space is OK for NUMBER */ while (ISSPACE(*ptr)) ptr++; *cpend = save; finish: /* the >= should be ==, but for SunOS 3.5 strtod() */ if (errno == 0 && ptr >= cpend) { n->flags |= newflags; } else { if (0 && do_lint && ptr < cpend) lintwarn(_("can't convert string to float")); errno = 0; } return n->numbr; }
Regexp * make_regexp(const char *s, size_t len, int ignorecase, int dfa) { Regexp *rp; const char *rerr; const char *src = s; char *temp; const char *end = s + len; register char *dest; register int c, c2; static short first = TRUE; static short no_dfa = FALSE; int has_anchor = FALSE; /* The number of bytes in the current multibyte character. It is 0, when the current character is a singlebyte character. */ size_t is_multibyte = 0; #ifdef MBS_SUPPORT mbstate_t mbs; if (gawk_mb_cur_max > 1) memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */ #endif if (first) { first = FALSE; no_dfa = (getenv("GAWK_NO_DFA") != NULL); /* for debugging and testing */ } /* Handle escaped characters first. */ /* * Build a copy of the string (in dest) with the * escaped characters translated, and generate the regex * from that. */ emalloc(dest, char *, len + 2, "make_regexp"); temp = dest; while (src < end) { #ifdef MBS_SUPPORT if (gawk_mb_cur_max > 1 && ! is_multibyte) { /* The previous byte is a singlebyte character, or last byte of a multibyte character. We check the next character. */ is_multibyte = mbrlen(src, end - src, &mbs); if ((is_multibyte == 1) || (is_multibyte == (size_t) -1) || (is_multibyte == (size_t) -2 || (is_multibyte == 0))) { /* We treat it as a singlebyte character. */ is_multibyte = 0; } } #endif /* We skip multibyte character, since it must not be a special character. */ if ((gawk_mb_cur_max == 1 || ! is_multibyte) && (*src == '\\')) { c = *++src; switch (c) { case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v': case 'x': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c2 = parse_escape(&src); if (c2 < 0) cant_happen(); /* * Unix awk treats octal (and hex?) chars * literally in re's, so escape regexp * metacharacters. */ if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x') && strchr("()|*+?.^$\\[]", c2) != NULL) *dest++ = '\\'; *dest++ = (char) c2; break; case '8': case '9': /* a\9b not valid */ *dest++ = c; src++; break; case 'y': /* normally \b */ /* gnu regex op */ if (! do_traditional) { *dest++ = '\\'; *dest++ = 'b'; src++; break; } /* else, fall through */ default: *dest++ = '\\'; *dest++ = (char) c; src++; break; } /* switch */ } else { c = *src; if (c == '^' || c == '$') has_anchor = TRUE; *dest++ = *src++; /* not '\\' */ } if (gawk_mb_cur_max > 1 && is_multibyte) is_multibyte--; } /* while */ *dest = '\0' ; /* Only necessary if we print dest ? */ emalloc(rp, Regexp *, sizeof(*rp), "make_regexp"); memset((char *) rp, 0, sizeof(*rp)); rp->pat.allocated = 0; /* regex will allocate the buffer */ emalloc(rp->pat.fastmap, char *, 256, "make_regexp"); /* * Lo these many years ago, had I known what a P.I.T.A. IGNORECASE * was going to turn out to be, I wouldn't have bothered with it. * * In the case where we have a multibyte character set, we have no * choice but to use RE_ICASE, since the casetable is for single-byte * character sets only. * * On the other hand, if we do have a single-byte character set, * using the casetable should give a performance improvement, since * it's computed only once, not each time a regex is compiled. We * also think it's probably better for portability. See the * discussion by the definition of casetable[] in eval.c. */ if (ignorecase) { if (gawk_mb_cur_max > 1) { syn |= RE_ICASE; rp->pat.translate = NULL; } else { syn &= ~RE_ICASE; rp->pat.translate = (char *) casetable; } } else { rp->pat.translate = NULL; syn &= ~RE_ICASE; } dfasyntax(syn | (ignorecase ? RE_ICASE : 0), ignorecase ? TRUE : FALSE, '\n'); re_set_syntax(syn); len = dest - temp; if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL) fatal("%s: /%s/", rerr, temp); /* rerr already gettextized inside regex routines */ /* gack. this must be done *after* re_compile_pattern */ rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */ if (dfa && ! no_dfa) { dfacomp(temp, len, &(rp->dfareg), TRUE); rp->dfa = TRUE; } else rp->dfa = FALSE; rp->has_anchor = has_anchor; free(temp); return rp; }