static void map_funny(char *str) { register char *from = str; register char *to = str; /* ** Fix 9005: using wrong test, wasn't mapping Kanji chars, so VMS ** filenames were bad. ** WARNING: this means we won't support Kanji filenames! ** Also, this doesn't solve the problem of 8-bit chars in the ** European character sets. We need a CMlegal_filename_char ** routine. */ /* map any funny bytes to underscores. */ while (*from != EOS) { i4 cnt = CMbytecnt(from); if ( (!CMalpha(from) && !CMdigit(from)) || CMdbl1st(from) ) *to++ = '_'; else *to++ = *from; from += cnt; } *to = EOS; }
/* * regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(i4 *flagp) { register char *ret; i4 flags; char null_byte = '\0'; *flagp = WORST; /* Tentatively. */ switch (*regparse) { case '^': CMnext( regparse ); ret = regnode(BOL); break; case '$': CMnext( regparse ); ret = regnode(EOL); break; case '.': CMnext( regparse ); ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { char *range_start = NULL; bool double_start; u_i2 first_u2, last_u2; u_char first_u1, last_u1; CMnext( regparse ); if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); CMnext( regparse ); } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') { regc( regparse ); CMnext( regparse ); } while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { char range_op = '-'; CMnext( regparse ); if( *regparse == ']' || *regparse == '\0' ) regc( &range_op ); else { char *tmp; bool invalid = FALSE; bool double_end; if( range_start == NULL ) invalid = TRUE; double_end = CMdbl1st( regparse ); if( !invalid && double_end && !double_start ) invalid = TRUE; if( !invalid && double_start && !double_start ) invalid = TRUE; if( !invalid && CMcmpcase( range_start, regparse ) > 0 ) invalid = TRUE; if( double_start ) _FAIL("don't know how to support character classes containing double-byte ranges"); if( invalid ) _FAIL("invalid [] range"); /* no double-byte ranges! */ /* ** Initialize the value for the end of the range. */ last_u1 = UCHARAT(regparse); for (; first_u1 <= last_u1; first_u1++ ) regc( (char *) &first_u1 ); CMnext( regparse ); } } else { range_start = regparse; if( CMdbl1st( range_start ) ) { double_start = TRUE; first_u2 = *(u_i2 *) range_start; } else { double_start = FALSE; first_u1 = UCHARAT(range_start); } regc( regparse ); CMnext( regparse ); } } regc( &null_byte ); if (*regparse != ']') _FAIL("unmatched []"); CMnext( regparse ); *flagp |= HASWIDTH|SIMPLE; } break; case '(': CMnext( regparse ); ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': CMnext( regparse ); _FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': CMnext( regparse ); _FAIL("?+* follows nothing"); break; case '\\': CMnext( regparse ); if (*regparse == '\0') _FAIL("trailing \\"); ret = regnode(EXACTLY); regc( regparse ); CMnext( regparse ); regc( &null_byte ); *flagp |= HASWIDTH|SIMPLE; break; default: { register i4 len; register char ender; len = my_strcspn(regparse, META); if (len <= 0) _FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc( regparse ); CMbytedec( len, regparse ); CMnext( regparse ); } regc( &null_byte ); } break; } return(ret); }
static i4 do_readln(FILE *fp,char *buf_ptr,i2 maxchar) { char *ptr; /* Working pointer into line buffer */ i4 count; /* Number of chars read */ STATUS status; ptr = buf_ptr; while (maxchar > 0) { status = SIread(fp,sizeof(char),&count,ptr); CMbytedec(maxchar,ptr); if ((CMdbl1st(ptr)) && (maxchar > 0)) { status = SIread(fp,sizeof(char),&count,(ptr + 1)); CMbytedec(maxchar,ptr); } switch(status) { case(ENDFILE): *ptr = EOS; count = STlength(buf_ptr); if (count > 0) { /* ** We hit EOF on a file that ended abruptly ** without a final end-of-line character. ** Return the final line's count this time - ** we'll return EOF on the next call. */ return(count); } return(-1); break; case(OK): if ((*ptr == CR) || (*ptr == LF) || (*ptr == FF)) { /* ** Do NOT include the end-of-line character! */ *ptr = EOS; return(STlength(buf_ptr)); } CMnext(ptr); break; default: /* ** According to the documentation, we shouldn't ** be able to get here. */ IIUGerr(E_DE000E_Dobj_read_fail,UG_ERR_ERROR,0); return(-1); } } /* ** If we wind up here, then ** we've exceeded maxchars! */ *ptr = EOS; return(STlength(buf_ptr)); }