/* parse a decimal number: returns 0 if string isn't a number, 1 otherwise */ int getn(const char *s, int *ai) { int i, c, rv = 0; bool neg = false; do { c = *s++; } while (ksh_isspace(c)); if (c == '-') { neg = true; c = *s++; } else if (c == '+') c = *s++; *ai = i = 0; do { if (!ksh_isdigit(c)) goto getn_out; i *= 10; if (i < *ai) /* overflow */ goto getn_out; i += c - '0'; *ai = i; } while ((c = *s++)); rv = 1; getn_out: if (neg) *ai = -*ai; return (rv); }
/* * Called by iosetup() (deals with 2>&4, etc.), c_read, c_print to turn * a string (the X in 2>&X, read -uX, print -uX) into a file descriptor. */ int check_fd(const char *name, int mode, const char **emsgp) { int fd = 0, fl; if (name[0] == 'p' && !name[1]) return (coproc_getfd(mode, emsgp)); while (ksh_isdigit(*name)) { fd = (fd * 10) + *name - '0'; if (fd >= FDBASE) { if (emsgp) *emsgp = "file descriptor too large"; return (-1); } ++name; } if (*name) { if (emsgp) *emsgp = "illegal file descriptor name"; return (-1); } if ((fl = fcntl(fd, F_GETFL, 0)) < 0) { if (emsgp) *emsgp = "bad file descriptor"; return (-1); } fl &= O_ACCMODE; /* * X_OK is a kludge to disable this check for dups (x<&1): * historical shells never did this check (XXX don't know what * POSIX has to say). */ if (!(mode & X_OK) && fl != O_RDWR && ( ((mode & R_OK) && fl != O_RDONLY) || ((mode & W_OK) && fl != O_WRONLY))) { if (emsgp) *emsgp = (fl == O_WRONLY) ? "fd not open for reading" : "fd not open for writing"; return (-1); } return (fd); }
/* parse a decimal number: returns 0 if string isn't a number, 1 otherwise */ int getn(const char *s, int *ai) { char c; mksh_ari_u num; bool neg = false; num.u = 0; do { c = *s++; } while (ksh_isspace(c)); switch (c) { case '-': neg = true; /* FALLTHROUGH */ case '+': c = *s++; break; } do { if (!ksh_isdigit(c)) /* not numeric */ return (0); if (num.u > 214748364U) /* overflow on multiplication */ return (0); num.u = num.u * 10U + (unsigned int)ksh_numdig(c); /* now: num.u <= 2147483649U */ } while ((c = *s++)); if (num.u > (neg ? 2147483648U : 2147483647U)) /* overflow for signed 32-bit int */ return (0); if (neg) num.u = -num.u; *ai = num.i; return (1); }
/* getopt() used for shell built-in commands, the getopts command, and * command line options. * A leading ':' in options means don't print errors, instead return '?' * or ':' and set go->optarg to the offending option character. * If GF_ERROR is set (and option doesn't start with :), errors result in * a call to bi_errorf(). * * Non-standard features: * - ';' is like ':' in options, except the argument is optional * (if it isn't present, optarg is set to 0). * Used for 'set -o'. * - ',' is like ':' in options, except the argument always immediately * follows the option character (optarg is set to the null string if * the option is missing). * Used for 'read -u2', 'print -u2' and fc -40. * - '#' is like ':' in options, expect that the argument is optional * and must start with a digit. If the argument doesn't start with a * digit, it is assumed to be missing and normal option processing * continues (optarg is set to 0 if the option is missing). * Used for 'typeset -LZ4'. * - accepts +c as well as -c IF the GF_PLUSOPT flag is present. If an * option starting with + is accepted, the GI_PLUS flag will be set * in go->info. */ int ksh_getopt(const char **argv, Getopt *go, const char *optionsp) { char c; const char *o; if (go->p == 0 || (c = argv[go->optind - 1][go->p]) == '\0') { const char *arg = argv[go->optind], flag = arg ? *arg : '\0'; go->p = 1; if (flag == '-' && arg[1] == '-' && arg[2] == '\0') { go->optind++; go->p = 0; go->info |= GI_MINUSMINUS; return (-1); } if (arg == NULL || ((flag != '-' ) && /* neither a - nor a + (if + allowed) */ (!(go->flags & GF_PLUSOPT) || flag != '+')) || (c = arg[1]) == '\0') { go->p = 0; return (-1); } go->optind++; go->info &= ~(GI_MINUS|GI_PLUS); go->info |= flag == '-' ? GI_MINUS : GI_PLUS; } go->p++; if (c == '?' || c == ':' || c == ';' || c == ',' || c == '#' || !(o = cstrchr(optionsp, c))) { if (optionsp[0] == ':') { go->buf[0] = c; go->optarg = go->buf; } else { warningf(true, "%s%s-%c: unknown option", (go->flags & GF_NONAME) ? "" : argv[0], (go->flags & GF_NONAME) ? "" : ": ", c); if (go->flags & GF_ERROR) bi_errorfz(); } return ('?'); } /* : means argument must be present, may be part of option argument * or the next argument * ; same as : but argument may be missing * , means argument is part of option argument, and may be null. */ if (*++o == ':' || *o == ';') { if (argv[go->optind - 1][go->p]) go->optarg = argv[go->optind - 1] + go->p; else if (argv[go->optind]) go->optarg = argv[go->optind++]; else if (*o == ';') go->optarg = NULL; else { if (optionsp[0] == ':') { go->buf[0] = c; go->optarg = go->buf; return (':'); } warningf(true, "%s%s-'%c' requires argument", (go->flags & GF_NONAME) ? "" : argv[0], (go->flags & GF_NONAME) ? "" : ": ", c); if (go->flags & GF_ERROR) bi_errorfz(); return ('?'); } go->p = 0; } else if (*o == ',') { /* argument is attached to option character, even if null */ go->optarg = argv[go->optind - 1] + go->p; go->p = 0; } else if (*o == '#') { /* argument is optional and may be attached or unattached * but must start with a digit. optarg is set to 0 if the * argument is missing. */ if (argv[go->optind - 1][go->p]) { if (ksh_isdigit(argv[go->optind - 1][go->p])) { go->optarg = argv[go->optind - 1] + go->p; go->p = 0; } else go->optarg = NULL; } else { if (argv[go->optind] && ksh_isdigit(argv[go->optind][0])) { go->optarg = argv[go->optind++]; go->p = 0; } else go->optarg = NULL; } } return (c); }
/* * lookup variable (according to (set&LOCAL)), set its attributes * (INTEGER, RDONLY, EXPORT, TRACE, LJUST, RJUST, ZEROFIL, LCASEV, * UCASEV_AL), and optionally set its value if an assignment. */ struct tbl * typeset(const char *var, uint32_t set, uint32_t clr, int field, int base) { struct tbl *vp; struct tbl *vpbase, *t; char *tvar; const char *val; size_t len; bool vappend = false; enum namerefflag new_refflag = SRF_NOP; if ((set & (ARRAY | ASSOC)) == ASSOC) { new_refflag = SRF_ENABLE; set &= ~(ARRAY | ASSOC); } if ((clr & (ARRAY | ASSOC)) == ASSOC) { new_refflag = SRF_DISABLE; clr &= ~(ARRAY | ASSOC); } /* check for valid variable name, search for value */ val = skip_varname(var, false); if (val == var) { /* no variable name given */ return (NULL); } if (*val == '[') { if (new_refflag != SRF_NOP) errorf("%s: %s", var, "reference variable can't be an array"); len = array_ref_len(val); if (len == 0) return (NULL); /* * IMPORT is only used when the shell starts up and is * setting up its environment. Allow only simple array * references at this time since parameter/command * substitution is performed on the [expression] which * would be a major security hole. */ if (set & IMPORT) { size_t i; for (i = 1; i < len - 1; i++) if (!ksh_isdigit(val[i])) return (NULL); } val += len; } if (val[0] == '=') { strndupx(tvar, var, val - var, ATEMP); ++val; } else if (set & IMPORT) { /* environment invalid variable name or no assignment */ return (NULL); } else if (val[0] == '+' && val[1] == '=') { strndupx(tvar, var, val - var, ATEMP); val += 2; vappend = true; } else if (val[0] != '\0') { /* other invalid variable names (not from environment) */ return (NULL); } else { /* just varname with no value part nor equals sign */ strdupx(tvar, var, ATEMP); val = NULL; /* handle foo[*] => foo (whole array) mapping for R39b */ len = strlen(tvar); if (len > 3 && tvar[len - 3] == '[' && tvar[len - 2] == '*' && tvar[len - 1] == ']') tvar[len - 3] = '\0'; } if (new_refflag == SRF_ENABLE) { const char *qval, *ccp; /* bail out on 'nameref foo+=bar' */ if (vappend) errorf("appending not allowed for nameref"); /* find value if variable already exists */ if ((qval = val) == NULL) { varsearch(e->loc, &vp, tvar, hash(tvar)); if (vp == NULL) goto nameref_empty; qval = str_val(vp); } /* check target value for being a valid variable name */ ccp = skip_varname(qval, false); if (ccp == qval) { if (ksh_isdigit(qval[0])) { int c; if (getn(qval, &c)) goto nameref_rhs_checked; } else if (qval[1] == '\0') switch (qval[0]) { case '$': case '!': case '?': case '#': case '-': goto nameref_rhs_checked; } nameref_empty: errorf("%s: %s", var, "empty nameref target"); } len = (*ccp == '[') ? array_ref_len(ccp) : 0; if (ccp[len]) { /* * works for cases "no array", "valid array with * junk after it" and "invalid array"; in the * latter case, len is also 0 and points to '[' */ errorf("%s: %s", qval, "nameref target not a valid parameter name"); } nameref_rhs_checked: /* prevent nameref loops */ while (qval) { if (!strcmp(qval, tvar)) errorf("%s: %s", qval, "expression recurses on parameter"); varsearch(e->loc, &vp, qval, hash(qval)); qval = NULL; if (vp && ((vp->flag & (ARRAY | ASSOC)) == ASSOC)) qval = str_val(vp); } } /* prevent typeset from creating a local PATH/ENV/SHELL */ if (Flag(FRESTRICTED) && (strcmp(tvar, "PATH") == 0 || strcmp(tvar, "ENV") == 0 || strcmp(tvar, "SHELL") == 0)) errorf("%s: %s", tvar, "restricted"); innermost_refflag = new_refflag; vp = (set & LOCAL) ? local(tvar, tobool(set & LOCAL_COPY)) : global(tvar); if (new_refflag == SRF_DISABLE && (vp->flag & (ARRAY|ASSOC)) == ASSOC) vp->flag &= ~ASSOC; else if (new_refflag == SRF_ENABLE) { if (vp->flag & ARRAY) { struct tbl *a, *tmp; /* free up entire array */ for (a = vp->u.array; a; ) { tmp = a; a = a->u.array; if (tmp->flag & ALLOC) afree(tmp->val.s, tmp->areap); afree(tmp, tmp->areap); } vp->u.array = NULL; vp->flag &= ~ARRAY; } vp->flag |= ASSOC; } set &= ~(LOCAL|LOCAL_COPY); vpbase = (vp->flag & ARRAY) ? global(arrayname(tvar)) : vp; /* * only allow export flag to be set; AT&T ksh allows any * attribute to be changed which means it can be truncated or * modified (-L/-R/-Z/-i) */ if ((vpbase->flag & RDONLY) && (val || clr || (set & ~EXPORT))) /* XXX check calls - is error here ok by POSIX? */ errorfx(2, "read-only: %s", tvar); afree(tvar, ATEMP); /* most calls are with set/clr == 0 */ if (set | clr) { bool ok = true; /* * XXX if x[0] isn't set, there will be problems: need * to have one copy of attributes for arrays... */ for (t = vpbase; t; t = t->u.array) { bool fake_assign; char *s = NULL; char *free_me = NULL; fake_assign = (t->flag & ISSET) && (!val || t != vp) && ((set & (UCASEV_AL|LCASEV|LJUST|RJUST|ZEROFIL)) || ((t->flag & INTEGER) && (clr & INTEGER)) || (!(t->flag & INTEGER) && (set & INTEGER))); if (fake_assign) { if (t->flag & INTEGER) { s = str_val(t); free_me = NULL; } else { s = t->val.s + t->type; free_me = (t->flag & ALLOC) ? t->val.s : NULL; } t->flag &= ~ALLOC; } if (!(t->flag & INTEGER) && (set & INTEGER)) { t->type = 0; t->flag &= ~ALLOC; } t->flag = (t->flag | set) & ~clr; /* * Don't change base if assignment is to be * done, in case assignment fails. */ if ((set & INTEGER) && base > 0 && (!val || t != vp)) t->type = base; if (set & (LJUST|RJUST|ZEROFIL)) t->u2.field = field; if (fake_assign) { if (!setstr(t, s, KSH_RETURN_ERROR)) { /* * Somewhat arbitrary action * here: zap contents of * variable, but keep the flag * settings. */ ok = false; if (t->flag & INTEGER) t->flag &= ~ISSET; else { if (t->flag & ALLOC) afree(t->val.s, t->areap); t->flag &= ~(ISSET|ALLOC); t->type = 0; } } if (free_me) afree(free_me, t->areap); } } if (!ok) errorfz(); } if (val != NULL) { char *tval; if (vappend) { tval = shf_smprintf("%s%s", str_val(vp), val); val = tval; } else tval = NULL; if (vp->flag&INTEGER) { /* do not zero base before assignment */ setstr(vp, val, KSH_UNWIND_ERROR | 0x4); /* done after assignment to override default */ if (base > 0) vp->type = base; } else /* setstr can't fail (readonly check already done) */ setstr(vp, val, KSH_RETURN_ERROR | 0x4); if (tval != NULL) afree(tval, ATEMP); } /* only x[0] is ever exported, so use vpbase */ if ((vpbase->flag&EXPORT) && !(vpbase->flag&INTEGER) && vpbase->type == 0) exportprep(vpbase, (vpbase->flag&ISSET) ? vpbase->val.s : null); return (vp); }
static int getint(struct tbl *vp, mksh_ari_u *nump, bool arith) { mksh_uari_t c, num = 0, base = 10; const char *s; bool have_base = false, neg = false; if (vp->flag & SPECIAL) getspec(vp); /* XXX is it possible for ISSET to be set and val.s to be NULL? */ if (!(vp->flag & ISSET) || (!(vp->flag & INTEGER) && vp->val.s == NULL)) return (-1); if (vp->flag & INTEGER) { nump->i = vp->val.i; return (vp->type); } s = vp->val.s + vp->type; do { c = (unsigned char)*s++; } while (ksh_isspace(c)); switch (c) { case '-': neg = true; /* FALLTHROUGH */ case '+': c = (unsigned char)*s++; break; } if (c == '0' && arith) { if (ksh_eq(s[0], 'X', 'x')) { /* interpret as hexadecimal */ base = 16; ++s; goto getint_c_style_base; } else if (Flag(FPOSIX) && ksh_isdigit(s[0]) && !(vp->flag & ZEROFIL)) { /* interpret as octal (deprecated) */ base = 8; getint_c_style_base: have_base = true; c = (unsigned char)*s++; } } do { if (c == '#') { /* ksh-style base determination */ if (have_base || num < 1) return (-1); if ((base = num) == 1) { /* mksh-specific extension */ unsigned int wc; if (!UTFMODE) wc = *(const unsigned char *)s; else if (utf_mbtowc(&wc, s) == (size_t)-1) /* OPTU-8 -> OPTU-16 */ /* * (with a twist: 1#\uEF80 converts * the same as 1#\x80 does, thus is * not round-tripping correctly XXX) */ wc = 0xEF00 + *(const unsigned char *)s; nump->u = (mksh_uari_t)wc; return (1); } else if (base > 36) base = 10; num = 0; have_base = true; continue; } if (ksh_isdigit(c)) c = ksh_numdig(c); else if (ksh_isupper(c)) c = ksh_numuc(c) + 10; else if (ksh_islower(c)) c = ksh_numlc(c) + 10; else return (-1); if (c >= base) return (-1); /* handle overflow as truncation */ num = num * base + c; } while ((c = (unsigned char)*s++)); if (neg) num = -num; nump->u = num; return (base); }
/* * Search for variable, if not found create globally. */ struct tbl * global(const char *n) { struct block *l = e->loc; struct tbl *vp; int c; bool array; uint32_t h, val; /* * check to see if this is an array; * dereference namerefs; must come first */ n = array_index_calc(n, &array, &val); h = hash(n); c = (unsigned char)n[0]; if (!ksh_isalphx(c)) { if (array) errorf("bad substitution"); vp = &vtemp; vp->flag = DEFINED; vp->type = 0; vp->areap = ATEMP; *vp->name = c; if (ksh_isdigit(c)) { if (getn(n, &c) && (c <= l->argc)) /* setstr can't fail here */ setstr(vp, l->argv[c], KSH_RETURN_ERROR); vp->flag |= RDONLY; return (vp); } vp->flag |= RDONLY; if (n[1] != '\0') return (vp); vp->flag |= ISSET|INTEGER; switch (c) { case '$': vp->val.i = kshpid; break; case '!': /* if no job, expand to nothing */ if ((vp->val.i = j_async()) == 0) vp->flag &= ~(ISSET|INTEGER); break; case '?': vp->val.i = exstat & 0xFF; break; case '#': vp->val.i = l->argc; break; case '-': vp->flag &= ~INTEGER; vp->val.s = getoptions(); break; default: vp->flag &= ~(ISSET|INTEGER); } return (vp); } l = varsearch(e->loc, &vp, n, h); if (vp != NULL) return (array ? arraysearch(vp, val) : vp); vp = ktenter(&l->vars, n, h); if (array) vp = arraysearch(vp, val); vp->flag |= DEFINED; if (special(n)) vp->flag |= SPECIAL; return (vp); }
int yylex(int cf) { Lex_state states[STATE_BSIZE], *statep, *s2, *base; State_info state_info; int c, c2, state; size_t cz; XString ws; /* expandable output word */ char *wp; /* output word pointer */ char *sp, *dp; Again: states[0].type = SINVALID; states[0].ls_base = NULL; statep = &states[1]; state_info.base = states; state_info.end = &state_info.base[STATE_BSIZE]; Xinit(ws, wp, 64, ATEMP); backslash_skip = 0; ignore_backslash_newline = 0; if (cf & ONEWORD) state = SWORD; else if (cf & LETEXPR) { /* enclose arguments in (double) quotes */ *wp++ = OQUOTE; state = SLETPAREN; statep->nparen = 0; } else { /* normal lexing */ state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; while ((c = getsc()) == ' ' || c == '\t') ; if (c == '#') { ignore_backslash_newline++; while ((c = getsc()) != '\0' && c != '\n') ; ignore_backslash_newline--; } ungetsc(c); } if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ source->flags &= ~SF_ALIAS; /* POSIX: trailing space only counts if parsing simple cmd */ if (!Flag(FPOSIX) || (cf & CMDWORD)) cf |= ALIAS; } /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ statep->type = state; /* collect non-special or quoted characters to form word */ while (!((c = getsc()) == 0 || ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { if (state == SBASE && subshell_nesting_type == /*{*/ '}' && c == /*{*/ '}') /* possibly end ${ :;} */ break; Xcheck(ws, wp); switch (state) { case SADELIM: if (c == '(') statep->nparen++; else if (c == ')') statep->nparen--; else if (statep->nparen == 0 && (c == /*{*/ '}' || c == (int)statep->ls_adelim.delimiter)) { *wp++ = ADELIM; *wp++ = c; if (c == /*{*/ '}' || --statep->ls_adelim.num == 0) POP_STATE(); if (c == /*{*/ '}') POP_STATE(); break; } /* FALLTHROUGH */ case SBASE: if (c == '[' && (cf & (VARASN|ARRAYVAR))) { /* temporary */ *wp = EOS; if (is_wdvarname(Xstring(ws, wp), false)) { char *p, *tmp; if (arraysub(&tmp)) { *wp++ = CHAR; *wp++ = c; for (p = tmp; *p; ) { Xcheck(ws, wp); *wp++ = CHAR; *wp++ = *p++; } afree(tmp, ATEMP); break; } else { Source *s; s = pushs(SREREAD, source->areap); s->start = s->str = s->u.freeme = tmp; s->next = source; source = s; } } *wp++ = CHAR; *wp++ = c; break; } /* FALLTHROUGH */ Sbase1: /* includes *(...|...) pattern (*+?@!) */ if (c == '*' || c == '@' || c == '+' || c == '?' || c == '!') { c2 = getsc(); if (c2 == '(' /*)*/ ) { *wp++ = OPAT; *wp++ = c; PUSH_STATE(SPATTERN); break; } ungetsc(c2); } /* FALLTHROUGH */ Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ switch (c) { case '\\': getsc_qchar: if ((c = getsc())) { /* trailing \ is lost */ *wp++ = QCHAR; *wp++ = c; } break; case '\'': open_ssquote_unless_heredoc: if ((cf & HEREDOC)) goto store_char; *wp++ = OQUOTE; ignore_backslash_newline++; PUSH_STATE(SSQUOTE); break; case '"': open_sdquote: *wp++ = OQUOTE; PUSH_STATE(SDQUOTE); break; case '$': /* * processing of dollar sign belongs into * Subst, except for those which can open * a string: $'…' and $"…" */ subst_dollar_ex: c = getsc(); switch (c) { case '"': goto open_sdquote; case '\'': goto open_sequote; default: goto SubstS; } default: goto Subst; } break; Subst: switch (c) { case '\\': c = getsc(); switch (c) { case '"': if ((cf & HEREDOC)) goto heredocquote; /* FALLTHROUGH */ case '\\': case '$': case '`': store_qchar: *wp++ = QCHAR; *wp++ = c; break; default: heredocquote: Xcheck(ws, wp); if (c) { /* trailing \ is lost */ *wp++ = CHAR; *wp++ = '\\'; *wp++ = CHAR; *wp++ = c; } break; } break; case '$': c = getsc(); SubstS: if (c == '(') /*)*/ { c = getsc(); if (c == '(') /*)*/ { *wp++ = EXPRSUB; PUSH_SRETRACE(SASPAREN); statep->nparen = 2; *retrace_info->xp++ = '('; } else { ungetsc(c); subst_command: c = COMSUB; subst_command2: sp = yyrecursive(c); cz = strlen(sp) + 1; XcheckN(ws, wp, cz); *wp++ = c; memcpy(wp, sp, cz); wp += cz; } } else if (c == '{') /*}*/ { if ((c = getsc()) == '|') { /* * non-subenvironment * value substitution */ c = VALSUB; goto subst_command2; } else if (ctype(c, C_IFSWS)) { /* * non-subenvironment * "command" substitution */ c = FUNSUB; goto subst_command2; } ungetsc(c); *wp++ = OSUBST; *wp++ = '{'; /*}*/ wp = get_brace_var(&ws, wp); c = getsc(); /* allow :# and :% (ksh88 compat) */ if (c == ':') { *wp++ = CHAR; *wp++ = c; c = getsc(); if (c == ':') { *wp++ = CHAR; *wp++ = '0'; *wp++ = ADELIM; *wp++ = ':'; PUSH_STATE(SBRACE); PUSH_STATE(SADELIM); statep->ls_adelim.delimiter = ':'; statep->ls_adelim.num = 1; statep->nparen = 0; break; } else if (ksh_isdigit(c) || c == '('/*)*/ || c == ' ' || /*XXX what else? */ c == '$') { /* substring subst. */ if (c != ' ') { *wp++ = CHAR; *wp++ = ' '; } ungetsc(c); PUSH_STATE(SBRACE); PUSH_STATE(SADELIM); statep->ls_adelim.delimiter = ':'; statep->ls_adelim.num = 2; statep->nparen = 0; break; } } else if (c == '/') { *wp++ = CHAR; *wp++ = c; if ((c = getsc()) == '/') { *wp++ = ADELIM; *wp++ = c; } else ungetsc(c); PUSH_STATE(SBRACE); PUSH_STATE(SADELIM); statep->ls_adelim.delimiter = '/'; statep->ls_adelim.num = 1; statep->nparen = 0; break; } /* * If this is a trim operation, * treat (,|,) specially in STBRACE. */ if (ctype(c, C_SUBOP2)) { ungetsc(c); if (Flag(FSH)) PUSH_STATE(STBRACEBOURNE); else PUSH_STATE(STBRACEKORN); } else { ungetsc(c); if (state == SDQUOTE || state == SQBRACE) PUSH_STATE(SQBRACE); else PUSH_STATE(SBRACE); } } else if (ksh_isalphx(c)) { *wp++ = OSUBST; *wp++ = 'X'; do { Xcheck(ws, wp); *wp++ = c; c = getsc(); } while (ksh_isalnux(c)); *wp++ = '\0'; *wp++ = CSUBST; *wp++ = 'X'; ungetsc(c); } else if (ctype(c, C_VAR1 | C_DIGIT)) { Xcheck(ws, wp); *wp++ = OSUBST; *wp++ = 'X'; *wp++ = c; *wp++ = '\0'; *wp++ = CSUBST; *wp++ = 'X'; } else { *wp++ = CHAR; *wp++ = '$'; ungetsc(c); } break; case '`': subst_gravis: PUSH_STATE(SBQUOTE); *wp++ = COMSUB; /* * We need to know whether we are within double * quotes, since most shells translate \" to " * within "…`…\"…`…". This is not done in POSIX * mode (§2.2.3 Double-Quotes: “The backquote * shall retain its special meaning introducing * the other form of command substitution (see * Command Substitution). The portion of the * quoted string from the initial backquote and * the characters up to the next backquote that * is not preceded by a <backslash>, having * escape characters removed, defines that * command whose output replaces "`...`" when * the word is expanded.”; §2.6.3 Command * Substitution: “Within the backquoted style * of command substitution, <backslash> shall * retain its literal meaning, except when * followed by: '$', '`', or <backslash>. The * search for the matching backquote shall be * satisfied by the first unquoted non-escaped * backquote; during this search, if a * non-escaped backquote is encountered[…], * undefined results occur.”). */ statep->ls_bool = false; if (Flag(FPOSIX)) break; s2 = statep; base = state_info.base; while (/* CONSTCOND */ 1) { for (; s2 != base; s2--) { if (s2->type == SDQUOTE) { statep->ls_bool = true; break; } } if (s2 != base) break; if (!(s2 = s2->ls_base)) break; base = s2-- - STATE_BSIZE; } break; case QCHAR: if (cf & LQCHAR) { *wp++ = QCHAR; *wp++ = getsc(); break; } /* FALLTHROUGH */ default: store_char: *wp++ = CHAR; *wp++ = c; } break; case SEQUOTE: if (c == '\'') { POP_STATE(); *wp++ = CQUOTE; ignore_backslash_newline--; } else if (c == '\\') { if ((c2 = unbksl(true, s_get, s_put)) == -1) c2 = s_get(); if (c2 == 0) statep->ls_bool = true; if (!statep->ls_bool) { char ts[4]; if ((unsigned int)c2 < 0x100) { *wp++ = QCHAR; *wp++ = c2; } else { cz = utf_wctomb(ts, c2 - 0x100); ts[cz] = 0; cz = 0; do { *wp++ = QCHAR; *wp++ = ts[cz]; } while (ts[++cz]); } } } else if (!statep->ls_bool) { *wp++ = QCHAR; *wp++ = c; } break; case SSQUOTE: if (c == '\'') { POP_STATE(); if ((cf & HEREDOC) || state == SQBRACE) goto store_char; *wp++ = CQUOTE; ignore_backslash_newline--; } else { *wp++ = QCHAR; *wp++ = c; } break; case SDQUOTE: if (c == '"') { POP_STATE(); *wp++ = CQUOTE; } else goto Subst; break; /* $(( ... )) */ case SASPAREN: if (c == '(') statep->nparen++; else if (c == ')') { statep->nparen--; if (statep->nparen == 1) { /* end of EXPRSUB */ POP_SRETRACE(); if ((c2 = getsc()) == /*(*/ ')') { cz = strlen(sp) - 2; XcheckN(ws, wp, cz); memcpy(wp, sp + 1, cz); wp += cz; afree(sp, ATEMP); *wp++ = '\0'; break; } else { Source *s; ungetsc(c2); /* * mismatched parenthesis - * assume we were really * parsing a $(...) expression */ --wp; s = pushs(SREREAD, source->areap); s->start = s->str = s->u.freeme = sp; s->next = source; source = s; goto subst_command; } } } /* reuse existing state machine */ goto Sbase2; case SQBRACE: if (c == '\\') { /* * perform POSIX "quote removal" if the back- * slash is "special", i.e. same cases as the * {case '\\':} in Subst: plus closing brace; * in mksh code "quote removal" on '\c' means * write QCHAR+c, otherwise CHAR+\+CHAR+c are * emitted (in heredocquote:) */ if ((c = getsc()) == '"' || c == '\\' || c == '$' || c == '`' || c == /*{*/'}') goto store_qchar; goto heredocquote; } goto common_SQBRACE; case SBRACE: if (c == '\'') goto open_ssquote_unless_heredoc; else if (c == '\\') goto getsc_qchar; common_SQBRACE: if (c == '"') goto open_sdquote; else if (c == '$') goto subst_dollar_ex; else if (c == '`') goto subst_gravis; else if (c != /*{*/ '}') goto store_char; POP_STATE(); *wp++ = CSUBST; *wp++ = /*{*/ '}'; break; /* Same as SBASE, except (,|,) treated specially */ case STBRACEKORN: if (c == '|') *wp++ = SPAT; else if (c == '(') { *wp++ = OPAT; /* simile for @ */ *wp++ = ' '; PUSH_STATE(SPATTERN); } else /* FALLTHROUGH */ case STBRACEBOURNE: if (c == /*{*/ '}') { POP_STATE(); *wp++ = CSUBST; *wp++ = /*{*/ '}'; } else goto Sbase1; break; case SBQUOTE: if (c == '`') { *wp++ = 0; POP_STATE(); } else if (c == '\\') { switch (c = getsc()) { case 0: /* trailing \ is lost */ break; case '$': case '`': case '\\': *wp++ = c; break; case '"': if (statep->ls_bool) { *wp++ = c; break; } /* FALLTHROUGH */ default: *wp++ = '\\'; *wp++ = c; break; } } else *wp++ = c; break; /* ONEWORD */ case SWORD: goto Subst; /* LETEXPR: (( ... )) */ case SLETPAREN: if (c == /*(*/ ')') { if (statep->nparen > 0) --statep->nparen; else if ((c2 = getsc()) == /*(*/ ')') { c = 0; *wp++ = CQUOTE; goto Done; } else { Source *s; ungetsc(c2); /* * mismatched parenthesis - * assume we were really * parsing a (...) expression */ *wp = EOS; sp = Xstring(ws, wp); dp = wdstrip(sp + 1, WDS_TPUTS); s = pushs(SREREAD, source->areap); s->start = s->str = s->u.freeme = dp; s->next = source; source = s; return ('('/*)*/); } } else if (c == '(') /* * parentheses inside quotes and * backslashes are lost, but AT&T ksh * doesn't count them either */ ++statep->nparen; goto Sbase2; /* << or <<- delimiter */ case SHEREDELIM: /* * here delimiters need a special case since * $ and `...` are not to be treated specially */ switch (c) { case '\\': if ((c = getsc())) { /* trailing \ is lost */ *wp++ = QCHAR; *wp++ = c; } break; case '\'': goto open_ssquote_unless_heredoc; case '$': if ((c2 = getsc()) == '\'') { open_sequote: *wp++ = OQUOTE; ignore_backslash_newline++; PUSH_STATE(SEQUOTE); statep->ls_bool = false; break; } else if (c2 == '"') { /* FALLTHROUGH */ case '"': PUSH_SRETRACE(SHEREDQUOTE); break; } ungetsc(c2); /* FALLTHROUGH */ default: *wp++ = CHAR; *wp++ = c; } break; /* " in << or <<- delimiter */ case SHEREDQUOTE: if (c != '"') goto Subst; POP_SRETRACE(); dp = strnul(sp) - 1; /* remove the trailing double quote */ *dp = '\0'; /* store the quoted string */ *wp++ = OQUOTE; XcheckN(ws, wp, (dp - sp) * 2); dp = sp; while ((c = *dp++)) { if (c == '\\') { switch ((c = *dp++)) { case '\\': case '"': case '$': case '`': break; default: *wp++ = CHAR; *wp++ = '\\'; break; } } *wp++ = CHAR; *wp++ = c; } afree(sp, ATEMP); *wp++ = CQUOTE; state = statep->type = SHEREDELIM; break; /* in *(...|...) pattern (*+?@!) */ case SPATTERN: if (c == /*(*/ ')') { *wp++ = CPAT; POP_STATE(); } else if (c == '|') { *wp++ = SPAT; } else if (c == '(') { *wp++ = OPAT; /* simile for @ */ *wp++ = ' '; PUSH_STATE(SPATTERN); } else goto Sbase1; break; } } Done: Xcheck(ws, wp); if (statep != &states[1]) /* XXX figure out what is missing */ yyerror("no closing quote\n"); /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ if (state == SHEREDELIM) state = SBASE; dp = Xstring(ws, wp); if (state == SBASE && ( #ifndef MKSH_LEGACY_MODE (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) || #endif c == '<' || c == '>')) { struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); if (Xlength(ws, wp) == 0) iop->unit = c == '<' ? 0 : 1; else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { if (dp[c2] != CHAR) goto no_iop; if (!ksh_isdigit(dp[c2 + 1])) goto no_iop; iop->unit = iop->unit * 10 + ksh_numdig(dp[c2 + 1]); if (iop->unit >= FDBASE) goto no_iop; } if (c == '&') { if ((c2 = getsc()) != '>') { ungetsc(c2); goto no_iop; } c = c2; iop->ioflag = IOBASH; } else iop->ioflag = 0; c2 = getsc(); /* <<, >>, <> are ok, >< is not */ if (c == c2 || (c == '<' && c2 == '>')) { iop->ioflag |= c == c2 ? (c == '>' ? IOCAT : IOHERE) : IORDWR; if (iop->ioflag == IOHERE) { if ((c2 = getsc()) == '-') iop->ioflag |= IOSKIP; else if (c2 == '<') iop->ioflag |= IOHERESTR; else ungetsc(c2); } } else if (c2 == '&') iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0); else { iop->ioflag |= c == '>' ? IOWRITE : IOREAD; if (c == '>' && c2 == '|') iop->ioflag |= IOCLOB; else ungetsc(c2); } iop->ioname = NULL; iop->delim = NULL; iop->heredoc = NULL; /* free word */ Xfree(ws, wp); yylval.iop = iop; return (REDIR); no_iop: afree(iop, ATEMP); } if (wp == dp && state == SBASE) { /* free word */ Xfree(ws, wp); /* no word, process LEX1 character */ if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { if ((c2 = getsc()) == c) c = (c == ';') ? BREAK : (c == '|') ? LOGOR : (c == '&') ? LOGAND : /* c == '(' ) */ MDPAREN; else if (c == '|' && c2 == '&') c = COPROC; else if (c == ';' && c2 == '|') c = BRKEV; else if (c == ';' && c2 == '&') c = BRKFT; else ungetsc(c2); #ifndef MKSH_SMALL if (c == BREAK) { if ((c2 = getsc()) == '&') c = BRKEV; else ungetsc(c2); } #endif } else if (c == '\n') { if (cf & HEREDELIM) ungetsc(c); else { gethere(); if (cf & CONTIN) goto Again; } } return (c); } /* terminate word */ *wp++ = EOS; yylval.cp = Xclose(ws, wp); if (state == SWORD || state == SLETPAREN /* XXX ONEWORD? */) return (LWORD); /* unget terminator */ ungetsc(c); /* * note: the alias-vs-function code below depends on several * interna: starting from here, source->str is not modified; * the way getsc() and ungetsc() operate; etc. */ /* copy word to unprefixed string ident */ sp = yylval.cp; dp = ident; while ((dp - ident) < IDENT && (c = *sp++) == CHAR) *dp++ = *sp++; if (c != EOS) /* word is not unquoted */ dp = ident; /* make sure the ident array stays NUL padded */ memset(dp, 0, (ident + IDENT) - dp + 1); if (!(cf & (KEYWORD | ALIAS))) return (LWORD); if (*ident != '\0') { struct tbl *p; uint32_t h = hash(ident); if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == /*{*/ '}')) { afree(yylval.cp, ATEMP); return (p->val.i); } if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && (p->flag & ISSET)) { /* * this still points to the same character as the * ungetsc'd terminator from above */ const char *cp = source->str; /* prefer POSIX but not Korn functions over aliases */ while (*cp == ' ' || *cp == '\t') /* * this is like getsc() without skipping * over Source boundaries (including not * parsing ungetsc'd characters that got * pushed into an SREREAD) which is what * we want here anyway: find out whether * the alias name is followed by a POSIX * function definition */ ++cp; /* prefer functions over aliases */ if (cp[0] != '(' || cp[1] != ')') { Source *s = source; while (s && (s->flags & SF_HASALIAS)) if (s->u.tblp == p) return (LWORD); else s = s->next; /* push alias expansion */ s = pushs(SALIAS, source->areap); s->start = s->str = p->val.s; s->u.tblp = p; s->flags |= SF_HASALIAS; s->next = source; if (source->type == SEOF) { /* prevent infinite recursion at EOS */ source->u.tblp = p; source->flags |= SF_HASALIAS; } source = s; afree(yylval.cp, ATEMP); goto Again; } } } else if (cf & ALIAS) { /* retain typeset et al. even when quoted */ if (assign_command((dp = wdstrip(yylval.cp, 0)))) strlcpy(ident, dp, sizeof(ident)); afree(dp, ATEMP); } return (LWORD); }
/* * Read the variable part of a ${...} expression (i.e. up to but not * including the :[-+?=#%] or close-brace). */ static char * get_brace_var(XString *wsp, char *wp) { char c; enum parse_state { PS_INITIAL, PS_SAW_HASH, PS_IDENT, PS_NUMBER, PS_VAR1 } state = PS_INITIAL; while (/* CONSTCOND */ 1) { c = getsc(); /* State machine to figure out where the variable part ends. */ switch (state) { case PS_INITIAL: if (c == '#' || c == '!' || c == '%') { state = PS_SAW_HASH; break; } /* FALLTHROUGH */ case PS_SAW_HASH: if (ksh_isalphx(c)) state = PS_IDENT; else if (ksh_isdigit(c)) state = PS_NUMBER; else if (c == '#') { if (state == PS_SAW_HASH) { char c2; c2 = getsc(); ungetsc(c2); if (c2 != /*{*/ '}') { ungetsc(c); goto out; } } state = PS_VAR1; } else if (ctype(c, C_VAR1)) state = PS_VAR1; else goto out; break; case PS_IDENT: if (!ksh_isalnux(c)) { if (c == '[') { char *tmp, *p; if (!arraysub(&tmp)) yyerror("missing ]\n"); *wp++ = c; for (p = tmp; *p; ) { Xcheck(*wsp, wp); *wp++ = *p++; } afree(tmp, ATEMP); /* the ] */ c = getsc(); } goto out; } break; case PS_NUMBER: if (!ksh_isdigit(c)) goto out; break; case PS_VAR1: goto out; } Xcheck(*wsp, wp); *wp++ = c; } out: /* end of variable part */ *wp++ = '\0'; ungetsc(c); return (wp); }
static int getint(struct tbl *vp, mksh_ari_t *nump, bool arith) { int c, base, neg; mksh_uari_t num; const char *s; bool have_base = false; if (vp->flag&SPECIAL) getspec(vp); /* XXX is it possible for ISSET to be set and val.s to be 0? */ if (!(vp->flag&ISSET) || (!(vp->flag&INTEGER) && vp->val.s == NULL)) return (-1); if (vp->flag&INTEGER) { *nump = vp->val.i; return (vp->type); } s = vp->val.s + vp->type; base = 10; num = 0; neg = 0; if (arith && s[0] == '0' && (s[1] | 0x20) == 'x') { s += 2; base = 16; have_base = true; } #ifdef MKSH_LEGACY_MODE if (arith && s[0] == '0' && ksh_isdigit(s[1]) && !(vp->flag & ZEROFIL)) { /* interpret as octal (deprecated) */ base = 8; have_base = true; } #endif while ((c = *s++)) { if (c == '-') { neg++; continue; } else if (c == '#') { if (have_base || num < 1 || num > 36) return (-1); base = (int)num; if (base == 1) { unsigned int wc; if (!UTFMODE) wc = *(const unsigned char *)s; else if (utf_mbtowc(&wc, s) == (size_t)-1) /* OPTU-8 -> OPTU-16 */ /* * (with a twist: 1#\uEF80 converts * the same as 1#\x80 does, thus is * not round-tripping correctly XXX) */ wc = 0xEF00 + *(const unsigned char *)s; *nump = (mksh_ari_t)wc; return (1); } num = 0; have_base = true; continue; } else if (ksh_isdigit(c)) c -= '0'; else if (ksh_islower(c)) c -= 'a' - 10; else if (ksh_isupper(c)) c -= 'A' - 10; else return (-1); if (c < 0 || c >= base) return (-1); num = num * base + c; } *nump = neg ? -((mksh_ari_t)num) : (mksh_ari_t)num; return (base); }