Beispiel #1
0
static void
map_funny(char *str)
{
    register char	*from = str;
    register char	*to = str;

    /*
    **	Fix 9005: using wrong test, wasn't mapping Kanji chars, so VMS 
    **		filenames were bad.
    **	WARNING: this means we won't support Kanji filenames!
    **		Also, this doesn't solve the problem of 8-bit chars in the
    **		European character sets.  We need a CMlegal_filename_char
    **		routine.
    */

    /* map any funny bytes to underscores. */
    while (*from != EOS)
    {
	i4	cnt = CMbytecnt(from);

	if ( (!CMalpha(from) && !CMdigit(from)) || CMdbl1st(from) )
	    *to++ = '_';
	else
	    *to++ = *from;
	from += cnt;
    }
    *to = EOS;
}
Beispiel #2
0
/*
 * regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom(i4 *flagp)
{
    register char *ret;
    i4 flags;
    char null_byte = '\0';

    *flagp = WORST;		/* Tentatively. */

    switch (*regparse) {
    case '^':
        CMnext( regparse );
        ret = regnode(BOL);
        break;
    case '$':
        CMnext( regparse );
        ret = regnode(EOL);
        break;
    case '.':
        CMnext( regparse );
        ret = regnode(ANY);
        *flagp |= HASWIDTH|SIMPLE;
        break;
    case '[': {
        char *range_start = NULL;
        bool double_start;
        u_i2 first_u2, last_u2;
        u_char first_u1, last_u1;

        CMnext( regparse );
        if (*regparse == '^') {	/* Complement of range. */
            ret = regnode(ANYBUT);
            CMnext( regparse );
        } else
            ret = regnode(ANYOF);
        if (*regparse == ']' || *regparse == '-') {
            regc( regparse );
            CMnext( regparse );
        }
        while (*regparse != '\0' && *regparse != ']') {
            if (*regparse == '-') {
                char range_op = '-';

                CMnext( regparse );
                if( *regparse == ']' ||
                        *regparse == '\0'
                  )
                    regc( &range_op );
                else {
                    char *tmp;
                    bool invalid = FALSE;
                    bool double_end;

                    if( range_start == NULL )
                        invalid = TRUE;

                    double_end =
                        CMdbl1st( regparse );

                    if( !invalid &&
                            double_end
                            && !double_start
                      )
                        invalid = TRUE;

                    if( !invalid &&
                            double_start
                            && !double_start
                      )
                        invalid = TRUE;

                    if( !invalid &&
                            CMcmpcase( range_start,
                                       regparse ) > 0
                      )
                        invalid = TRUE;

                    if( double_start )
                        _FAIL("don't know how to support character classes containing double-byte ranges");

                    if( invalid )
                        _FAIL("invalid [] range");
                    /* no double-byte ranges! */
                    /*
                    ** Initialize the value for the end of the range.
                    */
                    last_u1 = UCHARAT(regparse);
                    for (; first_u1 <= last_u1;
                            first_u1++
                        )
                        regc( (char *)
                              &first_u1 );

                    CMnext( regparse );
                }
            } else {
                range_start = regparse;
                if( CMdbl1st( range_start ) )
                {
                    double_start = TRUE;
                    first_u2 = *(u_i2 *) range_start;
                }
                else
                {
                    double_start = FALSE;
                    first_u1 = UCHARAT(range_start);
                }
                regc( regparse );
                CMnext( regparse );
            }
        }
        regc( &null_byte );
        if (*regparse != ']')
            _FAIL("unmatched []");
        CMnext( regparse );
        *flagp |= HASWIDTH|SIMPLE;
    }
    break;
    case '(':
        CMnext( regparse );
        ret = reg(1, &flags);
        if (ret == NULL)
            return(NULL);
        *flagp |= flags&(HASWIDTH|SPSTART);
        break;
    case '\0':
    case '|':
    case ')':
        CMnext( regparse );
        _FAIL("internal urp");	/* Supposed to be caught earlier. */
        break;
    case '?':
    case '+':
    case '*':
        CMnext( regparse );
        _FAIL("?+* follows nothing");
        break;
    case '\\':
        CMnext( regparse );
        if (*regparse == '\0')
            _FAIL("trailing \\");
        ret = regnode(EXACTLY);
        regc( regparse );
        CMnext( regparse );
        regc( &null_byte );
        *flagp |= HASWIDTH|SIMPLE;
        break;
    default: {
        register i4  len;
        register char ender;

        len = my_strcspn(regparse, META);
        if (len <= 0)
            _FAIL("internal disaster");
        ender = *(regparse+len);
        if (len > 1 && ISMULT(ender))
            len--;	/* Back off clear of ?+* operand. */
        *flagp |= HASWIDTH;
        if (len == 1)
            *flagp |= SIMPLE;
        ret = regnode(EXACTLY);
        while (len > 0) {
            regc( regparse );
            CMbytedec( len, regparse );
            CMnext( regparse );
        }
        regc( &null_byte );
    }
    break;
    }

    return(ret);
}
Beispiel #3
0
static
i4
do_readln(FILE *fp,char *buf_ptr,i2 maxchar)
{
	char	*ptr;			/* Working pointer into line buffer */
	i4	count;			/* Number of chars read */
	STATUS	status;


	ptr = buf_ptr;
	while (maxchar > 0)
	{
		status = SIread(fp,sizeof(char),&count,ptr);
		CMbytedec(maxchar,ptr);
		if ((CMdbl1st(ptr)) && (maxchar > 0))
		{
			status = SIread(fp,sizeof(char),&count,(ptr + 1));
			CMbytedec(maxchar,ptr);
		}
		switch(status)
		{
		case(ENDFILE):
			*ptr = EOS;
			count = STlength(buf_ptr);
			if  (count > 0)
			{
				/*
				** We hit EOF on a file that ended abruptly
				** without a final end-of-line character.
				** Return the final line's count this time -
				** we'll return EOF on the next call.
				*/
				return(count);
			}
			return(-1);
			break;

		case(OK):
			if ((*ptr == CR) || (*ptr == LF) || (*ptr == FF))
			{
				/*
				** Do NOT include the end-of-line character!
				*/
				*ptr = EOS;
				return(STlength(buf_ptr));
			}
			CMnext(ptr);
			break;

		default:
			/*
			** According to the documentation, we shouldn't
			** be able to get here.
			*/
			IIUGerr(E_DE000E_Dobj_read_fail,UG_ERR_ERROR,0);
			return(-1);
		}
	}

	/*
	** If we wind up here, then
	** we've exceeded maxchars!
	*/

	*ptr = EOS;
	return(STlength(buf_ptr));
}