コード例 #1
0
ファイル: node.c プロジェクト: OS2World/DEV-UTIL-gawk
NODE *
r_force_string(register NODE *s)
{
	NODE *ret;
#ifdef GAWKDEBUG
	if (s == NULL)
		cant_happen();
	if (s->type != Node_val)
		cant_happen();
	if (s->stref <= 0)
		cant_happen();
	if ((s->flags & STR) != 0
	    && (s->stfmt == -1 || s->stfmt == CONVFMTidx))
		return s;
#endif

	ret = format_val(CONVFMT, CONVFMTidx, s);
	return ret;
}
コード例 #2
0
ファイル: re.c プロジェクト: OS2World/DEV-UTIL-gawk
Regexp *
make_regexp(char *s, size_t len, int ignorecase, int dfa)
{
	Regexp *rp;
	const char *rerr;
	char *src = s;
	char *temp;
	char *end = s + len;
	register char *dest;
	register int c, c2;

	/* Handle escaped characters first. */

	/*
	 * Build a copy of the string (in dest) with the
	 * escaped characters translated, and generate the regex
	 * from that.  
	 */
	emalloc(dest, char *, len + 2, "make_regexp");
	temp = dest;

	while (src < end) {
		if (*src == '\\') {
			c = *++src;
			switch (c) {
			case 'a':
			case 'b':
			case 'f':
			case 'n':
			case 'r':
			case 't':
			case 'v':
			case 'x':
			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
				c2 = parse_escape(&src);
				if (c2 < 0)
					cant_happen();
				/*
				 * Unix awk treats octal (and hex?) chars
				 * literally in re's, so escape regexp
				 * metacharacters.
				 */
				if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x')
				    && strchr("()|*+?.^$\\[]", c2) != NULL)
					*dest++ = '\\';
				*dest++ = (char) c2;
				break;
			case '8':
			case '9':	/* a\9b not valid */
				*dest++ = c;
				src++;
				break;
			case 'y':	/* normally \b */
				/* gnu regex op */
				if (! do_traditional) {
					*dest++ = '\\';
					*dest++ = 'b';
					src++;
					break;
				}
				/* else, fall through */
			default:
				*dest++ = '\\';
				*dest++ = (char) c;
				src++;
				break;
			} /* switch */
		} else
			*dest++ = *src++;	/* not '\\' */
	} /* for */

	*dest = '\0' ;	/* Only necessary if we print dest ? */
	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
	memset((char *) rp, 0, sizeof(*rp));
	rp->pat.allocated = 0;	/* regex will allocate the buffer */
	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");

	if (ignorecase)
		rp->pat.translate = casetable;
	else
		rp->pat.translate = NULL;
	len = dest - temp;
	if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
		fatal("%s: /%s/", gettext(rerr), temp);

	/* gack. this must be done *after* re_compile_pattern */
	rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */
	if (dfa && ! ignorecase) {
		dfacomp(temp, len, &(rp->dfareg), TRUE);
		rp->dfa = TRUE;
	} else
		rp->dfa = FALSE;

	free(temp);
	return rp;
}
コード例 #3
0
ファイル: node.c プロジェクト: OS2World/DEV-UTIL-gawk
AWKNUM
r_force_number(register NODE *n)
{
	register char *cp;
	register char *cpend;
	char save;
	char *ptr;
	unsigned int newflags;
	extern double strtod();

#ifdef GAWKDEBUG
	if (n == NULL)
		cant_happen();
	if (n->type != Node_val)
		cant_happen();
	if(n->flags == 0)
		cant_happen();
	if (n->flags & NUM)
		return n->numbr;
#endif

	/* all the conditionals are an attempt to avoid the expensive strtod */

	n->numbr = 0.0;
	n->flags |= NUM;
	n->flags &= ~UNINITIALIZED;

	if (n->stlen == 0) {
		if (0 && do_lint)
			lintwarn(_("can't convert string to float"));
		return 0.0;
	}

	cp = n->stptr;
	if (ISALPHA(*cp)) {
		if (0 && do_lint)
			lintwarn(_("can't convert string to float"));
		return 0.0;
	}

	cpend = cp + n->stlen;
	while (cp < cpend && ISSPACE(*cp))
		cp++;
	if (cp == cpend || ISALPHA(*cp)) {
		if (0 && do_lint)
			lintwarn(_("can't convert string to float"));
		return 0.0;
	}

	if (n->flags & MAYBE_NUM) {
		newflags = NUMBER;
		n->flags &= ~MAYBE_NUM;
	} else
		newflags = 0;
	if (cpend - cp == 1) {
		if (ISDIGIT(*cp)) {
			n->numbr = (AWKNUM)(*cp - '0');
			n->flags |= newflags;
		} else if (0 && do_lint)
			lintwarn(_("can't convert string to float"));
		return n->numbr;
	}

	if (do_non_decimal_data) {
		errno = 0;
		if (! do_traditional && isnondecimal(cp)) {
			n->numbr = nondec2awknum(cp, cpend - cp);
			goto finish;
		}
	}

	errno = 0;
	save = *cpend;
	*cpend = '\0';
	n->numbr = (AWKNUM) strtod((const char *) cp, &ptr);

	/* POSIX says trailing space is OK for NUMBER */
	while (ISSPACE(*ptr))
		ptr++;
	*cpend = save;
finish:
	/* the >= should be ==, but for SunOS 3.5 strtod() */
	if (errno == 0 && ptr >= cpend) {
		n->flags |= newflags;
	} else {
		if (0 && do_lint && ptr < cpend)
			lintwarn(_("can't convert string to float"));
		errno = 0;
	}

	return n->numbr;
}
コード例 #4
0
ファイル: re.c プロジェクト: WndSks/msys
Regexp *
make_regexp(const char *s, size_t len, int ignorecase, int dfa)
{
	Regexp *rp;
	const char *rerr;
	const char *src = s;
	char *temp;
	const char *end = s + len;
	register char *dest;
	register int c, c2;
	static short first = TRUE;
	static short no_dfa = FALSE;
	int has_anchor = FALSE;

	/* The number of bytes in the current multibyte character.
	   It is 0, when the current character is a singlebyte character.  */
	size_t is_multibyte = 0;
#ifdef MBS_SUPPORT
	mbstate_t mbs;

	if (gawk_mb_cur_max > 1)
		memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize.  */
#endif

	if (first) {
		first = FALSE;
		no_dfa = (getenv("GAWK_NO_DFA") != NULL);	/* for debugging and testing */
	}

	/* Handle escaped characters first. */

	/*
	 * Build a copy of the string (in dest) with the
	 * escaped characters translated, and generate the regex
	 * from that.  
	 */
	emalloc(dest, char *, len + 2, "make_regexp");
	temp = dest;

	while (src < end) {
#ifdef MBS_SUPPORT
		if (gawk_mb_cur_max > 1 && ! is_multibyte) {
			/* The previous byte is a singlebyte character, or last byte
			   of a multibyte character.  We check the next character.  */
			is_multibyte = mbrlen(src, end - src, &mbs);
			if ((is_multibyte == 1) || (is_multibyte == (size_t) -1)
				|| (is_multibyte == (size_t) -2 || (is_multibyte == 0))) {
				/* We treat it as a singlebyte character.  */
				is_multibyte = 0;
			}
		}
#endif

		/* We skip multibyte character, since it must not be a special
		   character.  */
		if ((gawk_mb_cur_max == 1 || ! is_multibyte) &&
		    (*src == '\\')) {
			c = *++src;
			switch (c) {
			case 'a':
			case 'b':
			case 'f':
			case 'n':
			case 'r':
			case 't':
			case 'v':
			case 'x':
			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
				c2 = parse_escape(&src);
				if (c2 < 0)
					cant_happen();
				/*
				 * Unix awk treats octal (and hex?) chars
				 * literally in re's, so escape regexp
				 * metacharacters.
				 */
				if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x')
				    && strchr("()|*+?.^$\\[]", c2) != NULL)
					*dest++ = '\\';
				*dest++ = (char) c2;
				break;
			case '8':
			case '9':	/* a\9b not valid */
				*dest++ = c;
				src++;
				break;
			case 'y':	/* normally \b */
				/* gnu regex op */
				if (! do_traditional) {
					*dest++ = '\\';
					*dest++ = 'b';
					src++;
					break;
				}
				/* else, fall through */
			default:
				*dest++ = '\\';
				*dest++ = (char) c;
				src++;
				break;
			} /* switch */
		} else {
			c = *src;
			if (c == '^' || c == '$')
				has_anchor = TRUE;
			*dest++ = *src++;	/* not '\\' */
		}
		if (gawk_mb_cur_max > 1 && is_multibyte)
			is_multibyte--;
	} /* while */

	*dest = '\0' ;	/* Only necessary if we print dest ? */
	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
	memset((char *) rp, 0, sizeof(*rp));
	rp->pat.allocated = 0;	/* regex will allocate the buffer */
	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");

	/*
	 * Lo these many years ago, had I known what a P.I.T.A. IGNORECASE
	 * was going to turn out to be, I wouldn't have bothered with it.
	 *
	 * In the case where we have a multibyte character set, we have no
	 * choice but to use RE_ICASE, since the casetable is for single-byte
	 * character sets only.
	 *
	 * On the other hand, if we do have a single-byte character set,
	 * using the casetable should give  a performance improvement, since
	 * it's computed only once, not each time a regex is compiled.  We
	 * also think it's probably better for portability.  See the
	 * discussion by the definition of casetable[] in eval.c.
	 */

	if (ignorecase) {
		if (gawk_mb_cur_max > 1) {
			syn |= RE_ICASE;
			rp->pat.translate = NULL;
		} else {
			syn &= ~RE_ICASE;
			rp->pat.translate = (char *) casetable;
		}
	} else {
		rp->pat.translate = NULL;
		syn &= ~RE_ICASE;
	}

	dfasyntax(syn | (ignorecase ? RE_ICASE : 0), ignorecase ? TRUE : FALSE, '\n');
	re_set_syntax(syn);

	len = dest - temp;
	if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
		fatal("%s: /%s/", rerr, temp);	/* rerr already gettextized inside regex routines */

	/* gack. this must be done *after* re_compile_pattern */
	rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */
	if (dfa && ! no_dfa) {
		dfacomp(temp, len, &(rp->dfareg), TRUE);
		rp->dfa = TRUE;
	} else
		rp->dfa = FALSE;
	rp->has_anchor = has_anchor;

	free(temp);
	return rp;
}