static Join_t* init(void) { register Join_t* jp; register int i; setlocale(LC_ALL, ""); if (jp = newof(0, Join_t, 1, 0)) { if (jp->mb = mbwide()) for (i = 0x80; i <= 0xff; i++) jp->state[i] = S_WIDE; jp->state[' '] = jp->state['\t'] = S_SPACE; jp->state['\n'] = S_NL; jp->delim = -1; jp->nullfield = 0; if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) || !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0))) { done(jp); return 0; } jp->file[0].maxfields = NFIELD; jp->file[1].maxfields = NFIELD; jp->outmode = C_COMMON; } return jp; }
void sh_trim(register char *sp) /*@ assume sp!=NULL; promise strlen(in sp) <= in strlen(sp); @*/ { register char *dp; register int c; if(sp) { dp = sp; while(c= *sp) { #if SHOPT_MULTIBYTE int len; if(mbwide() && (len=mbsize(sp))>1) { memmove(dp, sp, len); dp += len; sp += len; continue; } #endif /* SHOPT_MULTIBYTE */ sp++; if(c == '\\') c = *sp++; if(c) *dp++ = c; } *dp = 0; } }
static void print(register Sfio_t* sp, register char* name, char* delim) { if (mbwide()) sfputr(sp, name, -1); else { #if CC_NATIVE != CC_ASCII register int c; register unsigned char* n2a; register unsigned char* a2n; register int aa; register int as; n2a = ccmap(CC_NATIVE, CC_ASCII); a2n = ccmap(CC_ASCII, CC_NATIVE); aa = n2a['A']; as = n2a[' ']; while (c = *name++) { c = n2a[c]; if (c & 0200) { c &= 0177; sfputc(sp, '?'); } if (c < as) { c += aa - 1; sfputc(sp, '^'); } c = a2n[c]; sfputc(sp, c); } #else register int c; while (c = *name++) { if (c & 0200) { c &= 0177; sfputc(sp, '?'); } if (c < ' ') { c += 'A' - 1; sfputc(sp, '^'); } sfputc(sp, c); } #endif } if (delim) sfputr(sp, delim, -1); }
Wc_t* wc_init(int mode) { register int n; register int w; Wc_t* wp; if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) return 0; if (!mbwide()) wp->mb = 0; #if _hdr_wchar && _hdr_wctype && _lib_iswctype else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8)) wp->mb = 1; #endif else wp->mb = -1; w = mode & WC_WORDS; for (n = (1<<CHAR_BIT); --n >= 0;) wp->type[n] = (w && isspace(n)) ? WC_SP : 0; wp->type['\n'] = WC_SP|WC_NL; if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0) { for (n = 0; n < 64; n++) { wp->type[0x80+n] |= WC_MB; if (n<32) wp->type[0xc0+n] |= WC_MB+1; else if (n<48) wp->type[0xc0+n] |= WC_MB+2; else if (n<56) wp->type[0xc0+n] |= WC_MB+3; else if (n<60) wp->type[0xc0+n] |= WC_MB+4; else if (n<62) wp->type[0xc0+n] |= WC_MB+5; } wp->type[0xc0] = WC_MB|WC_ERR; wp->type[0xc1] = WC_MB|WC_ERR; wp->type[0xfe] = WC_MB|WC_ERR; wp->type[0xff] = WC_MB|WC_ERR; } wp->mode = mode; return wp; }
int b_cut(int argc, char** argv, void* context) { register char* cp = 0; register Sfio_t* fp; char* s; int n; Cut_t* cut; int mode = 0; Delim_t wdelim; Delim_t ldelim; size_t reclen = 0; cmdinit(argc, argv, context, ERROR_CATALOG, 0); wdelim.chr = '\t'; ldelim.chr = '\n'; wdelim.len = ldelim.len = 1; for (;;) { switch (n = optget(argv, usage)) { case 0: break; case 'b': case 'c': if(mode&C_FIELDS) { error(2, "f option already specified"); continue; } cp = opt_info.arg; if(n=='b') mode |= C_BYTES; else mode |= C_CHARS; continue; case 'D': ldelim.str = opt_info.arg; if (mbwide()) { s = opt_info.arg; ldelim.chr = mbchar(s); if ((n = s - opt_info.arg) > 1) { ldelim.len = n; continue; } } ldelim.chr = *(unsigned char*)opt_info.arg; ldelim.len = 1; continue; case 'd': wdelim.str = opt_info.arg; if (mbwide()) { s = opt_info.arg; wdelim.chr = mbchar(s); if ((n = s - opt_info.arg) > 1) { wdelim.len = n; continue; } } wdelim.chr = *(unsigned char*)opt_info.arg; wdelim.len = 1; continue; case 'f': if(mode&(C_CHARS|C_BYTES)) { error(2, "c option already specified"); continue; } cp = opt_info.arg; mode |= C_FIELDS; continue; case 'n': mode |= C_NOSPLIT; continue; case 'N': mode |= C_NONEWLINE; continue; case 'R': case 'r': if(opt_info.num>0) reclen = opt_info.num; continue; case 's': mode |= C_SUPRESS; continue; case ':': error(2, "%s", opt_info.arg); break; case '?': error(ERROR_usage(2), "%s", opt_info.arg); break; } break; } argv += opt_info.index; if (error_info.errors) error(ERROR_usage(2), "%s",optusage(NiL)); if(!cp) { error(2, "b, c or f option must be specified"); error(ERROR_usage(2), "%s", optusage(NiL)); } if(!*cp) error(3, "non-empty b, c or f option must be specified"); if((mode & (C_FIELDS|C_SUPRESS)) == C_SUPRESS) error(3, "s option requires f option"); cut = cutinit(mode, cp, &wdelim, &ldelim, reclen); if(cp = *argv) argv++; do { if(!cp || streq(cp,"-")) fp = sfstdin; else if(!(fp = sfopen(NiL,cp,"r"))) { error(ERROR_system(0),"%s: cannot open",cp); continue; } if(mode&C_FIELDS) cutfields(cut,fp,sfstdout); else cutcols(cut,fp,sfstdout); if(fp!=sfstdin) sfclose(fp); } while(cp = *argv++); if (sfsync(sfstdout)) error(ERROR_system(0), "write error"); return error_info.errors != 0; }
static Cut_t* cutinit(int mode, char* str, Delim_t* wdelim, Delim_t* ldelim, size_t reclen) { register int* lp; register int c; register int n = 0; register int range = 0; register char* cp = str; Cut_t* cut; if (!(cut = (Cut_t*)stakalloc(sizeof(Cut_t) + strlen(cp) * sizeof(int)))) error(ERROR_exit(1), "out of space"); if (cut->mb = mbwide()) { memset(cut->space, 0, sizeof(cut->space) / 2); memset(cut->space + sizeof(cut->space) / 2, SP_WIDE, sizeof(cut->space) / 2); } else memset(cut->space, 0, sizeof(cut->space)); cut->wdelim = *wdelim; if (wdelim->len == 1) cut->space[wdelim->chr] = SP_WORD; cut->ldelim = *ldelim; cut->eob = (ldelim->len == 1) ? ldelim->chr : 0; cut->space[cut->eob] = SP_LINE; cut->cflag = (mode&C_CHARS) && cut->mb; cut->nosplit = (mode&(C_BYTES|C_NOSPLIT)) == (C_BYTES|C_NOSPLIT) && cut->mb; cut->sflag = (mode&C_SUPRESS) != 0; cut->nlflag = (mode&C_NONEWLINE) != 0; cut->reclen = reclen; lp = cut->list; for (;;) switch(c = *cp++) { case ' ': case '\t': while(*cp==' ' || *cp=='\t') cp++; /*FALLTHROUGH*/ case 0: case ',': if(range) { --range; if((n = (n ? (n-range) : (HUGE-1))) < 0) error(ERROR_exit(1),"invalid range for c/f option"); *lp++ = range; *lp++ = n; } else { *lp++ = --n; *lp++ = 1; } if(c==0) { register int *dp; *lp = HUGE; n = 1 + (lp-cut->list)/2; qsort(lp=cut->list,n,2*sizeof(*lp),mycomp); /* eliminate overlapping regions */ for(n=0,range= -2,dp=lp; *lp!=HUGE; lp+=2) { if(lp[0] <= range) { if(lp[1]==HUGE) { dp[-1] = HUGE; break; } if((c = lp[0]+lp[1]-range)>0) { range += c; dp[-1] += c; } } else { range = *dp++ = lp[0]; if(lp[1]==HUGE) { *dp++ = HUGE; break; } range += (*dp++ = lp[1]); } } *dp = HUGE; lp = cut->list; /* convert ranges into gaps */ for(n=0; *lp!=HUGE; lp+=2) { c = *lp; *lp -= n; n = c+lp[1]; } return cut; } n = range = 0; break; case '-': if(range) error(ERROR_exit(1),"bad list for c/f option"); range = n?n:1; n = 0; break; default: if(!isdigit(c)) error(ERROR_exit(1),"bad list for c/f option"); n = 10*n + (c-'0'); break; } /* NOTREACHED */ }
int b_paste(int argc, char** argv, Shbltin_t* context) { register int n, sflag=0; register Sfio_t *fp, **streams; register char *cp, *delim; char *ep; Delim_t *mp; int dlen, dsiz; char defdelim[2]; cmdinit(argc, argv, context, ERROR_CATALOG, 0); delim = 0; for (;;) { switch (optget(argv, usage)) { case 'd': delim = opt_info.arg; continue; case 's': sflag++; continue; case ':': error(2, "%s", opt_info.arg); break; case '?': error(ERROR_usage(2), "%s", opt_info.arg); break; } break; } argv += opt_info.index; if(error_info.errors) error(ERROR_usage(2),"%s", optusage(NiL)); if(!delim || !*delim) { delim = defdelim; delim[0] = '\t'; delim[1] = 0; } if (!(delim = strdup(delim))) error(ERROR_system(1), "out of space"); dlen = dsiz = stresc(delim); mp = 0; if (mbwide()) { cp = delim; ep = delim + dlen; dlen = 0; while (cp < ep) { mbchar(cp); dlen++; } if(dlen < dsiz) { if (!(mp = newof(0, Delim_t, dlen, 0))) { free(delim); error(ERROR_system(1), "out of space"); } cp = delim; dlen = 0; while (cp < ep) { mp[dlen].chr = cp; mbchar(cp); mp[dlen].len = cp - mp[dlen].chr; dlen++; } } } if(cp = *argv) { n = argc - opt_info.index; argv++; } else n = 1; if(!sflag) { if (!(streams = (Sfio_t**)stakalloc(n*sizeof(Sfio_t*)))) error(ERROR_exit(1), "out of space"); n = 0; } do { if(!cp || streq(cp,"-")) fp = sfstdin; else if(!(fp = sfopen(NiL,cp,"r"))) error(ERROR_system(0),"%s: cannot open",cp); if(fp && sflag) { if(spaste(fp,sfstdout,delim,dsiz,dlen,mp) < 0) error(ERROR_system(0),"write failed"); if(fp!=sfstdin) sfclose(fp); } else if(!sflag) streams[n++] = fp; } while(cp= *argv++); if(!sflag) { if(error_info.errors==0 && paste(n,streams,sfstdout,delim,dsiz,dlen,mp) < 0) error(ERROR_system(0),"write failed"); while(--n>=0) if((fp=streams[n]) && fp!=sfstdin) sfclose(fp); } if (mp) free(mp); free(delim); return(error_info.errors); }
int chrexp(const char *s, char **p, int *m, int flags) { const char *t; int c; const char *e; const char *b; char *r; int n; int x; wchar_t d; Mbstate_t q; bool u; bool w; u = w = 0; mbinit(&q); for (;;) { b = s; c = mbchar(&d, (char **)&s, MB_LEN_MAX, &q); switch (c) { case 0: s = b; break; case '\\': b = s; switch (c = *s++) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (!(flags & FMT_EXP_CHAR)) goto noexpand; c -= '0'; t = s + 2; while (s < t) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c = (c << 3) + *s++ - '0'; break; default: t = s; break; } } break; case 'a': if (!(flags & FMT_EXP_CHAR)) goto noexpand; c = CC_bel; break; case 'b': if (!(flags & FMT_EXP_CHAR)) goto noexpand; c = '\b'; break; case 'c': /*DEPRECATED*/ case 'C': if (!(flags & FMT_EXP_CHAR)) goto noexpand; c = *s; if (c) { s++; if (c == '\\') { c = chrexp(s - 1, &r, 0, flags); s = (const char *)r; } if (islower(c)) c = toupper(c); c ^= 0x40; } break; case 'e': /*DEPRECATED*/ case 'E': if (!(flags & FMT_EXP_CHAR)) goto noexpand; c = CC_esc; break; case 'f': if (!(flags & FMT_EXP_CHAR)) goto noexpand; c = '\f'; break; case 'M': if (!(flags & FMT_EXP_CHAR)) goto noexpand; if (*s == '-') { s++; c = CC_esc; } break; case 'n': if (flags & FMT_EXP_NONL) continue; if (!(flags & FMT_EXP_LINE)) goto noexpand; c = '\n'; break; case 'r': if (flags & FMT_EXP_NOCR) continue; if (!(flags & FMT_EXP_LINE)) goto noexpand; c = '\r'; break; case 't': if (!(flags & FMT_EXP_CHAR)) goto noexpand; c = '\t'; break; case 'v': if (!(flags & FMT_EXP_CHAR)) goto noexpand; c = CC_vt; break; case 'u': u = 1; // FALLTHRU case 'w': t = s + 4; goto wex; case 'U': u = 1; // FALLTHRU case 'W': t = s + 8; wex: if (!(flags & FMT_EXP_WIDE)) goto noexpand; w = 1; goto hex; case 'x': t = s + 2; hex: e = s; n = 0; c = 0; x = 0; while (!e || !t || s < t) { switch (*s) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': c = (c << 4) + *s++ - 'a' + 10; n++; continue; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': c = (c << 4) + *s++ - 'A' + 10; n++; continue; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c = (c << 4) + *s++ - '0'; n++; continue; case '{': case '[': if (s != e) break; e = 0; s++; if (w && (*s == 'U' || *s == 'W') && *(s + 1) == '+') s += 2; continue; case '-': if (e) break; if (*(s + 1) != '}' && *(s + 1) != ']') { if (!*(s + 1) || (*(s + 2) != '}' && *(s + 2) != ']')) { break; } x = *(unsigned char *)(s + 1); s += 2; } else { x = -1; s++; } /*FALLTHROUGH*/ case '}': case ']': if (!e) e = ++s; break; default: break; } break; } if (e) { if (n < 8 || (n == 8 && c >= 0)) { if (!w) { if (n > 2) { if (!(flags & FMT_EXP_WIDE)) goto noexpand; w = 1; } else if (!(flags & FMT_EXP_CHAR)) { goto noexpand; } else { break; } } if (!mbwide()) w = 0; if (c <= 0x7f) break; if (u) { uint32_t i = c; wchar_t o; if (!utf32invalid(i) && utf32stowcs(&o, &i, 1) > 0) { c = o; break; } } else if (w || c <= ast.byte_max) { break; } } if (x) { c = x; w = 0; break; } } /*FALLTHROUGH*/ case 0: goto noexpand; } break; default: if ((s - b) > 1) w = 1; break; noexpand: s = b; w = 0; c = '\\'; break; } break; } if (m) *m = w; if (p) *p = (char *)s; return c; }