Esempio n. 1
0
char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs,
                  char *ep, char *endbuf, int seof)
{
    int c;
    int eof = seof;
    char *lastep;
    int cclcnt;
    char bracket[NBRA], *bracketp;
    int closed;
    int neg;
    int lc;
    int i, cflg;
    int iflag; /* used for non-ascii characters in brackets */
    int nodelim = 0;
    char *sp = commands->cp;
    int regerrno = 0;

    lastep = 0;
    if ((c = GETC()) == eof || c == '\n') {
        if (c == '\n') {
            UNGETC(c);
            nodelim = 1;
        }
        commands->cp = sp;
        goto out;
    }
    bracketp = bracket;
    compargs->circf = closed = compargs->nbra = 0;
    if (c == '^')
        compargs->circf++;
    else
        UNGETC(c);
    while (1) {
        if (ep >= endbuf)
            SEDCOMPILE_ERROR(50);
        c = GETC();
        if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
            lastep = ep;
        if (c == eof) {
            *ep++ = CCEOF;
            if (bracketp != bracket)
                SEDCOMPILE_ERROR(42);
            commands->cp = sp;
            goto out;
        }
        switch (c) {

        case '.':
            *ep++ = CDOT;
            continue;

        case '\n':
            SEDCOMPILE_ERROR(36);
            commands->cp = sp;
            goto out;
        case '*':
            if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
                goto defchar;
            *lastep |= STAR;
            continue;

        case '$':
            if (PEEKC() != eof && PEEKC() != '\n')
                goto defchar;
            *ep++ = CDOL;
            continue;

        case '[':
            if (&ep[17] >= endbuf)
                SEDCOMPILE_ERROR(50);

            *ep++ = CCL;
            lc = 0;
            for (i = 0; i < 16; i++)
                ep[i] = 0;

            neg = 0;
            if ((c = GETC()) == '^') {
                neg = 1;
                c = GETC();
            }
            iflag = 1;
            do {
                c &= 0377;
                if (c == '\0' || c == '\n')
                    SEDCOMPILE_ERROR(49);
                if ((c & 0200) && iflag) {
                    iflag = 0;
                    if (&ep[32] >= endbuf)
                        SEDCOMPILE_ERROR(50);
                    ep[-1] = CXCL;
                    for (i = 16; i < 32; i++)
                        ep[i] = 0;
                }
                if (c == '-' && lc != 0) {
                    if ((c = GETC()) == ']') {
                        PLACE('-');
                        break;
                    }
                    if ((c & 0200) && iflag) {
                        iflag = 0;
                        if (&ep[32] >= endbuf)
                            SEDCOMPILE_ERROR(50);
                        ep[-1] = CXCL;
                        for (i = 16; i < 32; i++)
                            ep[i] = 0;
                    }
                    while (lc < c) {
                        PLACE(lc);
                        lc++;
                    }
                }
                lc = c;
                PLACE(c);
            } while ((c = GETC()) != ']');

            if (iflag)
                iflag = 16;
            else
                iflag = 32;

            if (neg) {
                if (iflag == 32) {
                    for (cclcnt = 0; cclcnt < iflag;
                        cclcnt++)
                        ep[cclcnt] ^= 0377;
                    ep[0] &= 0376;
                } else {
                    ep[-1] = NCCL;
                    /* make nulls match so test fails */
                    ep[0] |= 01;
                }
            }

            ep += iflag;

            continue;

        case '\\':
            switch (c = GETC()) {

            case '(':
                if (compargs->nbra >= NBRA)
                    SEDCOMPILE_ERROR(43);
                *bracketp++ = compargs->nbra;
                *ep++ = CBRA;
                *ep++ = compargs->nbra++;
                continue;

            case ')':
                if (bracketp <= bracket)
                    SEDCOMPILE_ERROR(42);
                *ep++ = CKET;
                *ep++ = *--bracketp;
                closed++;
                continue;

            case '{':
                if (lastep == (char *) 0)
                    goto defchar;
                *lastep |= RNGE;
                cflg = 0;
            nlim:
                c = GETC();
                i = 0;
                do {
                    if ('0' <= c && c <= '9')
                        i = 10 * i + c - '0';
                    else
                        SEDCOMPILE_ERROR(16);
                } while (((c = GETC()) != '\\') && (c != ','));
                if (i >= 255)
                    SEDCOMPILE_ERROR(11);
                *ep++ = i;
                if (c == ',') {
                    if (cflg++)
                        SEDCOMPILE_ERROR(44);
                    if ((c = GETC()) == '\\')
                        *ep++ = (char) 255;
                    else {
                        UNGETC(c);
                        goto nlim;
                        /* get 2'nd number */
                    }
                }
                if (GETC() != '}')
                    SEDCOMPILE_ERROR(45);
                if (!cflg)    /* one number */
                    *ep++ = i;
                else if ((ep[-1] & 0377) < (ep[-2] & 0377))
                    SEDCOMPILE_ERROR(46);
                continue;

            case '\n':
                SEDCOMPILE_ERROR(36);

            case 'n':
                c = '\n';
                goto defchar;

            default:
                if (c >= '1' && c <= '9') {
                    if ((c -= '1') >= closed)
                        SEDCOMPILE_ERROR(25);
                    *ep++ = CBACK;
                    *ep++ = c;
                    continue;
                }
            }
    /* Drop through to default to use \ to turn off special chars */

        defchar:
        default:
            lastep = ep;
            *ep++ = CCHR;
            *ep++ = c;
        }
    }
out:
    if (regerrno) {
        regerr(commands, regerrno);
        return (char*) NULL;
    }
    /* XXX : Basant : what extra */
    /* int reglength = (int)(ep - expbuf); */
    return ep;
}
Esempio n. 2
0
char *
_compile(const char *sp, char *ep, char *endbuf, int viflag)
{
	wchar_t		c;
	int		n;
	wchar_t 	d;
	const char 	*oldsp;
	char 		*lastep;
	int 		cclcnt;
	char 		bracket[NBRA], *bracketp;
	int		closed;
	int		neg;
	int		alloc;
	wchar_t		lc, cl;
	int		i, cflg;
	char		*expbuf = ep;
	char		*start;

	regerrno = 0;
	reglength = 0;
	lastep = 0;
	bracketp = bracket;
	closed = 0;
	alloc = 0;

	oldsp = sp;
	if ((c = *sp++) == '\0') {
		if (ep == (char *)0 || ep[1] == 0)
			ERROR(41);
		goto out;
	}
	nbra = 0;
	if (ep == (char *)0) {
		/* malloc space */
		const char *startsp = oldsp;
		n = 0;
		while ((d = *startsp++) != NULL) {
			if (d == '[')
				n += 33; /* add room for bitmaps */
		}
		n += 2 * (startsp - oldsp) + 3;
		if ((ep = malloc(n)) == (char *)0)
			ERROR(50);
		expbuf = ep;
		alloc = 1;
		endbuf = ep + n;
	}

	if (c == '^')
		*ep++ = 1;
	else  {
		*ep++ = 0;
		sp--;
	}

	endbuf--; /* avoid extra check for overflow */
	for (;;) {
		if (ep >= endbuf)
			ERROR(50);
		Popwchar
		if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
			lastep = ep;
		if (c == '\0') {
			*ep++ = CCEOF;
			if (bracketp != bracket)
				ERROR(42);
			goto out;
		}
		switch (c) {

		case '.':
			*ep++ = CDOT;
			continue;

		case '*':
			if (lastep == 0 || *lastep == CBRA ||*lastep == CKET ||
				*lastep == CBRC || *lastep == CLET)
				goto defchar;
			*lastep |= STAR;
			continue;

		case '$':
			/* look one character ahead to see if $ means */
			/* to anchor match at end of line */
			if ((d = PEEKC()) != '\0')
				goto defchar;
			*ep++ = CDOL;
			continue;

		case '[':
			start = ep + 34;
			if (start > endbuf)
				ERROR(50);

			*ep++ = CCL;
			lc = 0;
			for (i = 0; i < 32; i++)
				ep[i] = 0;

			neg = 0;
			Popwchar
			if (c == '^') {
				neg = 1;
				Popwchar
			}
			if (multibyte) {
				if (neg) {
					/* do not negate bitmap for */
					/* for multibyte characters */
					neg = 0;
					ep[-1] = NMCCL;
					/* turn off null byte */
					ep[0] |= 01;
				} else
					ep[-1] = MCCL;
			}
			do {
				if (c == '\0')
					ERROR(49);
				if (c == '-' && lc != 0) {
					Popwchar
					if (c == '\0')
						ERROR(49);
					if (c == ']') {
						PLACE('-');
						break;
					}
					/*
					 * ranges do not span code sets
					 */
					if (!multibyte || c <= 0177)
						while (lc < c) {
							PLACE(lc);
							lc++;
						}
					else
					if (valid_range(lc, c) && lc < c)
						/* insert '-' for range */
						*start++ = '-';
					if (viflag & 1)
						lc = 0;
					else
						lc = c;
				} else
				if (c == '\\' && (viflag & 1) &&
					strchr("\\^-]", PEEKC())) {
					c = GETC();
					lc = c;
				} else
					lc = c;
				/* put eight bit characters into bitmap */
				if (!multibyte || c <= 0177 || c <= 0377 &&
					iscntrl((int)c))
					PLACE(c);
				else {
					/*
					 * insert individual bytes of
					 * multibyte characters after
					 * bitmap
					 */
					if (start + n > endbuf)
						ERROR(50);
					while (n--)
						*start++ = *oldsp++;
				}
				Popwchar
			} while (c != ']');

			if (neg) {
				for (cclcnt = 0; cclcnt < 32; cclcnt++)
					ep[cclcnt] ^= 0377;
				ep[0] &= 0376;
			}
			ep += 32;
			if (multibyte) {
				/*
				 * Only allow 256 bytes to
				 * represent multibyte characters
				 * character class
				 */
				if (start - ep > MBYTE_SIZE)
					ERROR(50);
				*ep = (char)(start - ep);
				ep = start;
			}
			continue;

		case '\\':
			Popwchar
			switch (c) {

			case '(':
				if (nbra >= NBRA)
					ERROR(43);
				*bracketp++ = nbra;
				*ep++ = CBRA;
				*ep++ = nbra++;
				continue;

			case ')':
				if (bracketp <= bracket)
					ERROR(42);
				*ep++ = CKET;
				*ep++ = *--bracketp;
				closed++;
				continue;

			case '{':
				if (lastep == (char *)0)
					goto defchar;
				*lastep |= RNGE;
				cflg = 0;
				c = GETC();
			nlim:
				i = 0;
				do {
					if ('0' <= c && c <= '9')
						i = 10 * i + (int)c - '0';
					else
						ERROR(16);
				} while (((c = GETC()) != '\\') && (c != ','));
				if (i > MBYTE_SIZE)
					ERROR(11);
				*ep++ = (char)i;
				if (c == ',') {
					if (cflg++)
						ERROR(44);
					if ((c = GETC()) == '\\')
						*ep++ = (char)MBYTE_SIZE;
					else
						goto nlim;
						/* get 2'nd number */
				}
				if (GETC() != '}')
					ERROR(45);
				if (!cflg)	/* one number */
					*ep++ = (char)i;
				else
				if ((int)(unsigned char)ep[-1] <
					(int)(unsigned char)ep[-2])
					ERROR(46);
				continue;

			case 'n':
				c = '\n';
				goto defchar;

			case '<':
				*ep++ = CBRC;
				continue;

			case '>':
				*ep++ = CLET;
				continue;

			default:
				if (c >= '1' && c <= '9') {
					if ((c -= '1') >= closed)
						ERROR(25);
					*ep++ = CBACK;
					*ep++ = (char)c;
					continue;
				}
			}

	/* Drop through to default to use \ to turn off special chars */

		defchar:
		default:
			lastep = ep;
			if (!multibyte || c <= 0177) {
				/* 8-bit character */
				*ep++ = CCHR;
				*ep++ = (char)c;
			} else {
				/* multibyte character */
				*ep++ = MCCHR;
				if (ep + n > endbuf)
					ERROR(50);
				while (n--)
					*ep++ = *oldsp++;
			}
		}