parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, bool allow_incomplete) { parse_node_tree_t node_tree; parse_error_list_t parse_errors; parser_test_error_bits_t res = 0; // Whether we encountered a parse error bool errored = false; // Whether we encountered an unclosed block // We detect this via an 'end_command' block without source bool has_unclosed_block = false; // Whether there's an unclosed quote, and therefore unfinished // This is only set if allow_incomplete is set bool has_unclosed_quote = false; // Parse the input string into a parse tree // Some errors are detected here bool parsed = parse_tree_from_string(buff_src, allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none, &node_tree, &parse_errors); if (allow_incomplete) { for (size_t i=0; i < parse_errors.size(); i++) { if (parse_errors.at(i).code == parse_error_tokenizer_unterminated_quote) { // Remove this error, since we don't consider it a real error has_unclosed_quote = true; parse_errors.erase(parse_errors.begin() + i); i--; } } } // #1238: If the only error was unterminated quote, then consider this to have parsed successfully. A better fix would be to have parse_tree_from_string return this information directly (but it would be a shame to munge up its nice bool return). if (parse_errors.empty() && has_unclosed_quote) { parsed = true; } if (! parsed) { errored = true; } // has_unclosed_quote may only be set if allow_incomplete is true assert(! has_unclosed_quote || allow_incomplete); // Expand all commands // Verify 'or' and 'and' not used inside pipelines // Verify pipes via parser_is_pipe_forbidden // Verify return only within a function // Verify no variable expansions if (! errored) { const size_t node_tree_size = node_tree.size(); for (size_t i=0; i < node_tree_size; i++) { const parse_node_t &node = node_tree.at(i); if (node.type == symbol_end_command && ! node.has_source()) { // an 'end' without source is an unclosed block has_unclosed_block = true; } else if (node.type == symbol_boolean_statement) { // 'or' and 'and' can be in a pipeline, as long as they're first parse_bool_statement_type_t type = parse_node_tree_t::statement_boolean_type(node); if ((type == parse_bool_and || type == parse_bool_or) && node_tree.statement_is_in_pipeline(node, false /* don't count first */)) { errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, (type == parse_bool_and) ? L"and" : L"or"); } } else if (node.type == symbol_argument) { const wcstring arg_src = node.get_source(buff_src); res |= parse_util_detect_errors_in_argument(node, arg_src, &parse_errors); } else if (node.type == symbol_job) { if (node_tree.job_should_be_backgrounded(node)) { /* Disallow background in the following cases: foo & ; and bar foo & ; or bar if foo & ; end while foo & ; end */ const parse_node_t *job_parent = node_tree.get_parent(node); assert(job_parent != NULL); switch (job_parent->type) { case symbol_if_clause: case symbol_while_header: { assert(node_tree.get_child(*job_parent, 1) == &node); errored = append_syntax_error(&parse_errors, node, BACKGROUND_IN_CONDITIONAL_ERROR_MSG); break; } case symbol_job_list: { // This isn't very complete, e.g. we don't catch 'foo & ; not and bar' assert(node_tree.get_child(*job_parent, 0) == &node); const parse_node_t *next_job_list = node_tree.get_child(*job_parent, 1, symbol_job_list); assert(next_job_list != NULL); const parse_node_t *next_job = node_tree.next_node_in_node_list(*next_job_list, symbol_job, NULL); if (next_job != NULL) { const parse_node_t *next_statement = node_tree.get_child(*next_job, 0, symbol_statement); if (next_statement != NULL) { const parse_node_t *spec_statement = node_tree.get_child(*next_statement, 0); if (spec_statement && spec_statement->type == symbol_boolean_statement) { switch (parse_node_tree_t::statement_boolean_type(*spec_statement)) { // These are not allowed case parse_bool_and: errored = append_syntax_error(&parse_errors, *spec_statement, BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and"); break; case parse_bool_or: errored = append_syntax_error(&parse_errors, *spec_statement, BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or"); break; case parse_bool_not: // This one is OK break; } } } } break; } default: break; } } } else if (node.type == symbol_plain_statement) { // In a few places below, we want to know if we are in a pipeline const bool is_in_pipeline = node_tree.statement_is_in_pipeline(node, true /* count first */); // We need to know the decoration const enum parse_statement_decoration_t decoration = node_tree.decoration_for_plain_statement(node); // Check that we don't try to pipe through exec if (is_in_pipeline && decoration == parse_statement_decoration_exec) { errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, L"exec"); } wcstring command; if (node_tree.command_for_plain_statement(node, buff_src, &command)) { // Check that we can expand the command if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS, NULL)) { // TODO: leverage the resulting errors errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str()); } // Check that pipes are sound if (! errored && parser_is_pipe_forbidden(command) && is_in_pipeline) { errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, command.c_str()); } // Check that we don't return from outside a function // But we allow it if it's 'return --help' if (! errored && command == L"return") { const parse_node_t *ancestor = &node; bool found_function = false; while (ancestor != NULL) { const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor); if (possible_function_header != NULL && possible_function_header->type == symbol_function_header) { found_function = true; break; } ancestor = node_tree.get_parent(*ancestor); } if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) { errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG); } } // Check that we don't break or continue from outside a loop if (! errored && (command == L"break" || command == L"continue")) { // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function. // This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header. bool found_loop = false, end_search = false; const parse_node_t *ancestor = &node; while (ancestor != NULL && ! end_search) { const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); if (loop_or_function_header != NULL) { switch (loop_or_function_header->type) { case symbol_while_header: case symbol_for_header: // this is a loop header, so we can break or continue found_loop = true; end_search = true; break; case symbol_function_header: // this is a function header, so we cannot break or continue. We stop our search here. found_loop = false; end_search = true; break; default: // most likely begin / end style block, which makes no difference break; } } ancestor = node_tree.get_parent(*ancestor); } if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src)) { errored = append_syntax_error(&parse_errors, node, (command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG)); } } // Check that we don't do an invalid builtin (#1252) if (! errored && decoration == parse_statement_decoration_builtin && ! builtin_exists(command)) { errored = append_syntax_error(&parse_errors, node, UNKNOWN_BUILTIN_ERR_MSG, command.c_str()); } } } } } if (errored) res |= PARSER_TEST_ERROR; if (has_unclosed_block || has_unclosed_quote) res |= PARSER_TEST_INCOMPLETE; if (out_errors) { out_errors->swap(parse_errors); } return res; }
/** Test if this argument contains any errors. Detected errors include syntax errors in command substitutions, improperly escaped characters and improper use of the variable expansion operator. */ parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t &node, const wcstring &arg_src, parse_error_list_t *out_errors) { assert(node.type == symbol_argument); int err=0; wchar_t *paran_begin, *paran_end; int do_loop = 1; wcstring working_copy = arg_src; while (do_loop) { const wchar_t *working_copy_cstr = working_copy.c_str(); switch (parse_util_locate_cmdsubst(working_copy_cstr, ¶n_begin, ¶n_end, false)) { case -1: { err=1; if (out_errors) { append_syntax_error(out_errors, node, L"Mismatched parenthesis"); } return err; } case 0: { do_loop = 0; break; } case 1: { const wcstring subst(paran_begin + 1, paran_end); // Replace the command substitution with just INTERNAL_SEPARATOR size_t cmd_sub_start = paran_begin - working_copy_cstr; size_t cmd_sub_len = paran_end + 1 - paran_begin; working_copy.replace(cmd_sub_start, cmd_sub_len, wcstring(1, INTERNAL_SEPARATOR)); parse_error_list_t subst_errors; err |= parse_util_detect_errors(subst, &subst_errors, false /* do not accept incomplete */); /* Our command substitution produced error offsets relative to its source. Tweak the offsets of the errors in the command substitution to account for both its offset within the string, and the offset of the node */ size_t error_offset = cmd_sub_start + 1 + node.source_start; parse_error_offset_source_start(&subst_errors, error_offset); if (out_errors != NULL) { out_errors->insert(out_errors->end(), subst_errors.begin(), subst_errors.end()); } break; } } } wcstring unesc; if (! unescape_string(working_copy, &unesc, UNESCAPE_SPECIAL)) { if (out_errors) { append_syntax_error(out_errors, node, L"Invalid token '%ls'", working_copy.c_str()); } return 1; } else { /* Check for invalid variable expansions */ const size_t unesc_size = unesc.size(); for (size_t idx = 0; idx < unesc_size; idx++) { switch (unesc.at(idx)) { case VARIABLE_EXPAND: case VARIABLE_EXPAND_SINGLE: { wchar_t next_char = (idx + 1 < unesc_size ? unesc.at(idx + 1) : L'\0'); if (next_char != VARIABLE_EXPAND && next_char != VARIABLE_EXPAND_SINGLE && ! wcsvarchr(next_char)) { err=1; if (out_errors) { parse_util_expand_variable_error(node, unesc, idx, node.source_start, out_errors); } } break; } } } } return err; }
/// Perform brace expansion. static expand_error_t expand_braces(const wcstring &instr, expand_flags_t flags, std::vector<completion_t> *out, parse_error_list_t *errors) { bool syntax_error = false; int brace_count = 0; const wchar_t *brace_begin = NULL, *brace_end = NULL; const wchar_t *last_sep = NULL; const wchar_t *item_begin; size_t length_preceding_braces, length_following_braces, tot_len; const wchar_t *const in = instr.c_str(); // Locate the first non-nested brace pair. for (const wchar_t *pos = in; (*pos) && !syntax_error; pos++) { switch (*pos) { case BRACE_BEGIN: { if (brace_count == 0) brace_begin = pos; brace_count++; break; } case BRACE_END: { brace_count--; if (brace_count < 0) { syntax_error = true; } else if (brace_count == 0) { brace_end = pos; } break; } case BRACE_SEP: { if (brace_count == 1) last_sep = pos; break; } default: { break; // we ignore all other characters here } } } if (brace_count > 0) { if (!(flags & EXPAND_FOR_COMPLETIONS)) { syntax_error = true; } else { // The user hasn't typed an end brace yet; make one up and append it, then expand // that. wcstring mod; if (last_sep) { mod.append(in, brace_begin - in + 1); mod.append(last_sep + 1); mod.push_back(BRACE_END); } else { mod.append(in); mod.push_back(BRACE_END); } // Note: this code looks very fishy, apparently it has never worked. return expand_braces(mod, 1, out, errors); } } // Expand a literal "{}" to itself because it is useless otherwise, // and this eases e.g. `find -exec {}`. See #1109. if (brace_begin + 1 == brace_end) { wcstring newstr = instr; newstr.at(brace_begin - in) = L'{'; newstr.at(brace_end - in) = L'}'; return expand_braces(newstr, flags, out, errors); } if (syntax_error) { append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, _(L"Mismatched braces")); return EXPAND_ERROR; } if (brace_begin == NULL) { append_completion(out, instr); return EXPAND_OK; } length_preceding_braces = (brace_begin - in); length_following_braces = wcslen(brace_end) - 1; tot_len = length_preceding_braces + length_following_braces; item_begin = brace_begin + 1; for (const wchar_t *pos = (brace_begin + 1); true; pos++) { if (brace_count == 0 && ((*pos == BRACE_SEP) || (pos == brace_end))) { assert(pos >= item_begin); size_t item_len = pos - item_begin; wcstring item = wcstring(item_begin, item_len); item = trim(item, (const wchar_t[]) { BRACE_SPACE, L'\0' }); for (auto &c : item) { if (c == BRACE_SPACE) { c = ' '; } } wcstring whole_item; whole_item.reserve(tot_len + item_len + 2); whole_item.append(in, length_preceding_braces); whole_item.append(item.begin(), item.end()); whole_item.append(brace_end + 1); expand_braces(whole_item, flags, out, errors); item_begin = pos + 1; if (pos == brace_end) break; }
void parse_util_expand_variable_error(const parse_node_t &node, const wcstring &token, size_t token_pos, size_t error_pos, parse_error_list_t *out_errors) { size_t stop_pos = token_pos+1; switch (token[stop_pos]) { case BRACKET_BEGIN: { wchar_t *cpy = wcsdup(token.c_str()); *(cpy+token_pos)=0; wchar_t *name = &cpy[stop_pos+1]; wchar_t *end = wcschr(name, BRACKET_END); wchar_t *post = NULL; int is_var=0; if (end) { post = end+1; *end = 0; if (!wcsvarname(name)) { is_var = 1; } } if (is_var) { append_syntax_error(out_errors, node, COMPLETE_VAR_BRACKET_DESC, cpy, name, post); } else { append_syntax_error(out_errors, node, COMPLETE_VAR_BRACKET_DESC, L"", L"VARIABLE", L""); } free(cpy); break; } case INTERNAL_SEPARATOR: { append_syntax_error(out_errors, node, COMPLETE_VAR_PARAN_DESC); break; } case 0: { append_syntax_error(out_errors, node, COMPLETE_VAR_NULL_DESC); break; } default: { wchar_t token_stop_char = token[stop_pos]; // Unescape (see http://github.com/fish-shell/fish-shell/issues/50) if (token_stop_char == ANY_CHAR) token_stop_char = L'?'; else if (token_stop_char == ANY_STRING || token_stop_char == ANY_STRING_RECURSIVE) token_stop_char = L'*'; append_syntax_error(out_errors, node, (token_stop_char == L'?' ? COMPLETE_YOU_WANT_STATUS : COMPLETE_VAR_DESC), token_stop_char); break; } } }
/// Expand all environment variables in the string *ptr. /// /// This function is slow, fragile and complicated. There are lots of little corner cases, like /// $$foo should do a double expansion, $foo$bar should not double expand bar, etc. Also, it's easy /// to accidentally leak memory on array out of bounds errors an various other situations. All in /// all, this function should be rewritten, split out into multiple logical units and carefully /// tested. After that, it can probably be optimized to do fewer memory allocations, fewer string /// scans and overall just less work. But until that happens, don't edit it unless you know exactly /// what you are doing, and do proper testing afterwards. /// /// This function operates on strings backwards, starting at last_idx. /// /// Note: last_idx is considered to be where it previously finished procesisng. This means it /// actually starts operating on last_idx-1. As such, to process a string fully, pass string.size() /// as last_idx instead of string.size()-1. static bool expand_variables(const wcstring &instr, std::vector<completion_t> *out, size_t last_idx, parse_error_list_t *errors) { const size_t insize = instr.size(); // last_idx may be 1 past the end of the string, but no further. assert(last_idx <= insize && "Invalid last_idx"); if (last_idx == 0) { append_completion(out, instr); return true; } // Locate the last VARIABLE_EXPAND or VARIABLE_EXPAND_SINGLE bool is_single = false; size_t varexp_char_idx = last_idx; while (varexp_char_idx--) { const wchar_t c = instr.at(varexp_char_idx); if (c == VARIABLE_EXPAND || c == VARIABLE_EXPAND_SINGLE) { is_single = (c == VARIABLE_EXPAND_SINGLE); break; } } if (varexp_char_idx >= instr.size()) { // No variable expand char, we're done. append_completion(out, instr); return true; } // Get the variable name. const size_t var_name_start = varexp_char_idx + 1; size_t var_name_stop = var_name_start; while (var_name_stop < insize) { const wchar_t nc = instr.at(var_name_stop); if (nc == VARIABLE_EXPAND_EMPTY) { var_name_stop++; break; } if (!valid_var_name_char(nc)) break; var_name_stop++; } assert(var_name_stop >= var_name_start && "Bogus variable name indexes"); const size_t var_name_len = var_name_stop - var_name_start; // It's an error if the name is empty. if (var_name_len == 0) { if (errors) { parse_util_expand_variable_error(instr, 0 /* global_token_pos */, varexp_char_idx, errors); } return false; } // Get the variable name as a string, then try to get the variable from env. const wcstring var_name(instr, var_name_start, var_name_len); // Do a dirty hack to make sliced history fast (#4650). We expand from either a variable, or a // history_t. Note that "history" is read only in env.cpp so it's safe to special-case it in // this way (it cannot be shadowed, etc). history_t *history = nullptr; maybe_t<env_var_t> var{}; if (var_name == L"history") { // We do this only on the main thread, matching env.cpp. if (is_main_thread()) { history = reader_get_history(); } } else if (var_name != wcstring{VARIABLE_EXPAND_EMPTY}) { var = env_get(var_name); } // Parse out any following slice. // Record the end of the variable name and any following slice. size_t var_name_and_slice_stop = var_name_stop; bool all_values = true; const size_t slice_start = var_name_stop; // List of indexes, and parallel array of source positions of each index in the variable list. std::vector<long> var_idx_list; std::vector<size_t> var_pos_list; if (slice_start < insize && instr.at(slice_start) == L'[') { all_values = false; const wchar_t *in = instr.c_str(); wchar_t *slice_end; // If a variable is missing, behave as though we have one value, so that $var[1] always // works. size_t effective_val_count = 1; if (var) { effective_val_count = var->as_list().size(); } else if (history) { effective_val_count = history->size(); } size_t bad_pos = parse_slice(in + slice_start, &slice_end, var_idx_list, var_pos_list, effective_val_count); if (bad_pos != 0) { append_syntax_error(errors, slice_start + bad_pos, L"Invalid index value"); return false; } var_name_and_slice_stop = (slice_end - in); } if (!var && !history) { // Expanding a non-existent variable. if (!is_single) { // Normal expansions of missing variables successfully expand to nothing. return true; } else { // Expansion to single argument. // Replace the variable name and slice with VARIABLE_EXPAND_EMPTY. wcstring res(instr, 0, varexp_char_idx); if (!res.empty() && res.back() == VARIABLE_EXPAND_SINGLE) { res.push_back(VARIABLE_EXPAND_EMPTY); } res.append(instr, var_name_and_slice_stop, wcstring::npos); return expand_variables(res, out, varexp_char_idx, errors); } } // Ok, we have a variable or a history. Let's expand it. // Start by respecting the sliced elements. assert((var || history) && "Should have variable or history here"); wcstring_list_t var_item_list; if (all_values) { if (history) { history->get_history(var_item_list); } else { var->to_list(var_item_list); } } else { // We have to respect the slice. if (history) { // Ask history to map indexes to item strings. // Note this may have missing entries for out-of-bounds. auto item_map = history->items_at_indexes(var_idx_list); for (long item_index : var_idx_list) { auto iter = item_map.find(item_index); if (iter != item_map.end()) { var_item_list.push_back(iter->second); } } } else { const wcstring_list_t &all_var_items = var->as_list(); for (long item_index : var_idx_list) { // Check that we are within array bounds. If not, skip the element. Note: // Negative indices (`echo $foo[-1]`) are already converted to positive ones // here, So tmp < 1 means it's definitely not in. // Note we are 1-based. if (item_index >= 1 && size_t(item_index) <= all_var_items.size()) { var_item_list.push_back(all_var_items.at(item_index - 1)); } } } } if (is_single) { wcstring res(instr, 0, varexp_char_idx); if (!res.empty()) { if (res.back() != VARIABLE_EXPAND_SINGLE) { res.push_back(INTERNAL_SEPARATOR); } else if (var_item_list.empty() || var_item_list.front().empty()) { // First expansion is empty, but we need to recursively expand. res.push_back(VARIABLE_EXPAND_EMPTY); } } // Append all entries in var_item_list, separated by spaces. // Remove the last space. if (!var_item_list.empty()) { for (const wcstring &item : var_item_list) { res.append(item); res.push_back(L' '); } res.pop_back(); } res.append(instr, var_name_and_slice_stop, wcstring::npos); return expand_variables(res, out, varexp_char_idx, errors); } else { // Normal cartesian-product expansion. for (const wcstring &item : var_item_list) { if (varexp_char_idx == 0 && var_name_and_slice_stop == insize) { append_completion(out, item); } else { wcstring new_in(instr, 0, varexp_char_idx); if (!new_in.empty()) { if (new_in.back() != VARIABLE_EXPAND) { new_in.push_back(INTERNAL_SEPARATOR); } else if (item.empty()) { new_in.push_back(VARIABLE_EXPAND_EMPTY); } } new_in.append(item); new_in.append(instr, var_name_and_slice_stop, wcstring::npos); if (!expand_variables(new_in, out, varexp_char_idx, errors)) { return false; } } } } return true; }