bool parser_t::detect_errors_in_argument_list(const wcstring &arg_list_src, wcstring *out, const wchar_t *prefix) const { bool errored = false; parse_error_list_t errors; // Use empty string for the prefix if it's NULL. if (!prefix) prefix = L""; //!OCLINT(parameter reassignment) // Parse the string as an argument list. parse_node_tree_t tree; if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, &errors, symbol_freestanding_argument_list)) { // Failed to parse. errored = true; } if (!errored) { // Get the root argument list and extract arguments from it. assert(!tree.empty()); //!OCLINT(multiple unary operator) tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0)); while (auto arg = arg_list.next_in_list<grammar::argument>()) { const wcstring arg_src = arg.get_source(arg_list_src); if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) { errored = true; } } } if (!errors.empty() && out != NULL) { out->assign(errors.at(0).describe_with_prefix( arg_list_src, prefix, false /* not interactive */, false /* don't skip caret */)); } return errored; }
void parser_t::expand_argument_list(const wcstring &arg_list_src, expand_flags_t eflags, std::vector<completion_t> *output_arg_list) { assert(output_arg_list != NULL); /* Parse the string as an argument list */ parse_node_tree_t tree; if (! parse_tree_from_string(arg_list_src, parse_flag_none, &tree, NULL /* errors */, symbol_freestanding_argument_list)) { /* Failed to parse. Here we expect to have reported any errors in test_args */ return; } /* Get the root argument list */ assert(! tree.empty()); const parse_node_t *arg_list = &tree.at(0); assert(arg_list->type == symbol_freestanding_argument_list); /* Extract arguments from it */ while (arg_list != NULL) { const parse_node_t *arg_node = tree.next_node_in_node_list(*arg_list, symbol_argument, &arg_list); if (arg_node != NULL) { const wcstring arg_src = arg_node->get_source(arg_list_src); if (expand_string(arg_src, output_arg_list, eflags, NULL) == EXPAND_ERROR) { /* Failed to expand a string */ break; } } } }
// Entry point for prettification. static wcstring prettify(const wcstring &src, bool do_indent) { parse_node_tree_t parse_tree; int parse_flags = (parse_flag_continue_after_error | parse_flag_include_comments | parse_flag_leave_unterminated | parse_flag_show_blank_lines); if (!parse_tree_from_string(src, parse_flags, &parse_tree, NULL)) { return src; // we return the original string on failure } if (dump_parse_tree) { const wcstring dump = parse_dump_tree(parse_tree, src); fwprintf(stderr, L"%ls\n", dump.c_str()); } // We may have a forest of disconnected trees on a parse failure. We have to handle all nodes // that have no parent, and all parse errors. bool has_new_line = true; wcstring result; for (node_offset_t i = 0; i < parse_tree.size(); i++) { const parse_node_t &node = parse_tree.at(i); if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error) { // A root node. prettify_node_recursive(src, parse_tree, i, 0, symbol_job_list, &has_new_line, &result, do_indent); } } return result; }
int parser_t::eval(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type) { CHECK_BLOCK(1); if (block_type != TOP && block_type != SUBST) { debug(1, INVALID_SCOPE_ERR_MSG, parser_t::get_block_desc(block_type)); bugreport(); return 1; } /* Parse the source into a tree, if we can */ parse_node_tree_t tree; parse_error_list_t error_list; if (! parse_tree_from_string(cmd, parse_flag_none, &tree, this->show_errors ? &error_list : NULL)) { if (this->show_errors) { /* Get a backtrace */ wcstring backtrace_and_desc; this->get_backtrace(cmd, error_list, &backtrace_and_desc); /* Print it */ fprintf(stderr, "%ls", backtrace_and_desc.c_str()); } return 1; } //print_stderr(block_stack_description()); /* Determine the initial eval level. If this is the first context, it's -1; otherwise it's the eval level of the top context. This is sort of wonky because we're stitching together a global notion of eval level from these separate objects. A better approach would be some profile object that all contexts share, and that tracks the eval levels on its own. */ int exec_eval_level = (execution_contexts.empty() ? -1 : execution_contexts.back()->current_eval_level()); /* Append to the execution context stack */ parse_execution_context_t *ctx = new parse_execution_context_t(tree, cmd, this, exec_eval_level); execution_contexts.push_back(ctx); /* Execute the first node */ if (! tree.empty()) { this->eval_block_node(0, io, block_type); } /* Clean up the execution context stack */ assert(! execution_contexts.empty() && execution_contexts.back() == ctx); execution_contexts.pop_back(); delete ctx; return 0; }
int parser_t::eval(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type) { // Parse the source into a tree, if we can. parse_node_tree_t tree; parse_error_list_t error_list; if (!parse_tree_from_string(cmd, parse_flag_none, &tree, &error_list)) { // Get a backtrace. This includes the message. wcstring backtrace_and_desc; this->get_backtrace(cmd, error_list, backtrace_and_desc); // Print it. fwprintf(stderr, L"%ls\n", backtrace_and_desc.c_str()); return 1; } return this->eval(cmd, io, block_type, std::move(tree)); }
void parser_t::expand_argument_list(const wcstring &arg_list_src, std::vector<completion_t> *output_arg_list) { assert(output_arg_list != NULL); expand_flags_t eflags = 0; if (! show_errors) eflags |= EXPAND_NO_DESCRIPTIONS; if (this->parser_type != PARSER_TYPE_GENERAL) eflags |= EXPAND_SKIP_CMDSUBST; /* Suppress calling proc_push_interactive off of the main thread. */ if (this->parser_type == PARSER_TYPE_GENERAL) { proc_push_interactive(0); } /* Parse the string as an argument list */ parse_node_tree_t tree; if (! parse_tree_from_string(arg_list_src, parse_flag_none, &tree, NULL /* errors */, symbol_freestanding_argument_list)) { /* Failed to parse. Here we expect to have reported any errors in test_args */ return; } /* Get the root argument list */ assert(! tree.empty()); const parse_node_t *arg_list = &tree.at(0); assert(arg_list->type == symbol_freestanding_argument_list); /* Extract arguments from it */ while (arg_list != NULL) { const parse_node_t *arg_node = tree.next_node_in_node_list(*arg_list, symbol_argument, &arg_list); if (arg_node != NULL) { const wcstring arg_src = arg_node->get_source(arg_list_src); if (expand_string(arg_src, output_arg_list, eflags, NULL) == EXPAND_ERROR) { /* Failed to expand a string */ break; } } } if (this->parser_type == PARSER_TYPE_GENERAL) { proc_pop_interactive(); } }
bool parser_t::detect_errors_in_argument_list(const wcstring &arg_list_src, wcstring *out, const wchar_t *prefix) { bool errored = false; parse_error_list_t errors; /* Use empty string for the prefix if it's NULL */ if (prefix == NULL) { prefix = L""; } /* Parse the string as an argument list */ parse_node_tree_t tree; if (! parse_tree_from_string(arg_list_src, parse_flag_none, &tree, &errors, symbol_freestanding_argument_list)) { /* Failed to parse. */ errored = true; } if (! errored) { /* Get the root argument list */ assert(! tree.empty()); const parse_node_t *arg_list = &tree.at(0); assert(arg_list->type == symbol_freestanding_argument_list); /* Extract arguments from it */ while (arg_list != NULL && ! errored) { const parse_node_t *arg_node = tree.next_node_in_node_list(*arg_list, symbol_argument, &arg_list); if (arg_node != NULL) { const wcstring arg_src = arg_node->get_source(arg_list_src); if (parse_util_detect_errors_in_argument(*arg_node, arg_src, &errors)) { errored = true; } } } } if (! errors.empty() && out != NULL) { out->assign(errors.at(0).describe_with_prefix(arg_list_src, prefix, false /* not interactive */, false /* don't skip caret */)); } return errored; }
int parser_t::eval(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type) { /* Parse the source into a tree, if we can */ parse_node_tree_t tree; parse_error_list_t error_list; if (! parse_tree_from_string(cmd, parse_flag_none, &tree, &error_list)) { /* Get a backtrace. This includes the message. */ wcstring backtrace_and_desc; this->get_backtrace(cmd, error_list, &backtrace_and_desc); /* Print it */ fprintf(stderr, "%ls", backtrace_and_desc.c_str()); return 1; } return this->eval_acquiring_tree(cmd, io, block_type, moved_ref<parse_node_tree_t>(tree)); }
void parser_t::expand_argument_list(const wcstring &arg_list_src, expand_flags_t eflags, std::vector<completion_t> *output_arg_list) { assert(output_arg_list != NULL); // Parse the string as an argument list. parse_node_tree_t tree; if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, NULL /* errors */, symbol_freestanding_argument_list)) { // Failed to parse. Here we expect to have reported any errors in test_args. return; } // Get the root argument list and extract arguments from it. assert(!tree.empty()); //!OCLINT(multiple unary operator) tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0)); while (auto arg = arg_list.next_in_list<grammar::argument>()) { const wcstring arg_src = arg.get_source(arg_list_src); if (expand_string(arg_src, output_arg_list, eflags, NULL) == EXPAND_ERROR) { break; // failed to expand a string } } }
/* Entry point for prettification. */ static wcstring prettify(const wcstring &src, bool do_indent) { parse_node_tree_t tree; if (! parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_include_comments | parse_flag_leave_unterminated | parse_flag_show_blank_lines, &tree, NULL /* errors */)) { /* We return the initial string on failure */ return src; } /* We may have a forest of disconnected trees on a parse failure. We have to handle all nodes that have no parent, and all parse errors. */ bool has_new_line = true; wcstring result; for (node_offset_t i=0; i < tree.size(); i++) { const parse_node_t &node = tree.at(i); if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error) { /* A root node */ prettify_node_recursive(src, tree, i, 0, symbol_job_list, &has_new_line, &result, do_indent); } } return result; }
std::vector<int> parse_util_compute_indents(const wcstring &src) { /* Make a vector the same size as the input string, which contains the indents. Initialize them to -1. */ const size_t src_size = src.size(); std::vector<int> indents(src_size, -1); /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */ parse_node_tree_t tree; parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_include_comments | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */ node_offset_t start_node_idx = 0; int last_trailing_indent = 0; while (start_node_idx < tree.size()) { /* The indent that we'll get for the last line */ int trailing_indent = 0; /* Biggest offset we visited */ node_offset_t max_visited_node_idx = 0; /* Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which will prevent the really-root job list from indenting */ compute_indents_recursive(tree, start_node_idx, last_trailing_indent, symbol_job_list, &indents, &trailing_indent, &max_visited_node_idx); /* We may have more to indent. The trailing indent becomes our current indent. Start at the node after the last we visited. */ last_trailing_indent = trailing_indent; start_node_idx = max_visited_node_idx + 1; } /* Handle comments. Each comment node has a parent (which is whatever the top of the symbol stack was when the comment was encountered). So the source range of the comment has the same indent as its parent. */ const size_t tree_size = tree.size(); for (node_offset_t i=0; i < tree_size; i++) { const parse_node_t &node = tree.at(i); if (node.type == parse_special_type_comment && node.has_source() && node.parent < tree_size) { const parse_node_t &parent = tree.at(node.parent); if (parent.source_start != SOURCE_OFFSET_INVALID) { indents.at(node.source_start) = indents.at(parent.source_start); } } } /* Now apply the indents. The indents array has -1 for places where the indent does not change, so start at each value and extend it along the run of -1s */ int last_indent = 0; for (size_t i=0; i<src_size; i++) { int this_indent = indents.at(i); if (this_indent < 0) { indents.at(i) = last_indent; } else { /* New indent level */ last_indent = this_indent; /* Make all whitespace before a token have the new level. This avoid using the wrong indentation level if a new line starts with whitespace. */ size_t prev_char_idx = i; while (prev_char_idx--) { if (!wcschr(L" \n\t\r", src.at(prev_char_idx))) break; indents.at(prev_char_idx) = last_indent; } } } /* Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly indented even if it is empty. */ size_t suffix_idx = src_size; while (suffix_idx--) { if (!wcschr(L" \n\t\r", src.at(suffix_idx))) break; indents.at(suffix_idx) = last_trailing_indent; } return indents; }
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, bool allow_incomplete) { parse_node_tree_t node_tree; parse_error_list_t parse_errors; parser_test_error_bits_t res = 0; // Whether we encountered a parse error bool errored = false; // Whether we encountered an unclosed block // We detect this via an 'end_command' block without source bool has_unclosed_block = false; // Whether there's an unclosed quote, and therefore unfinished // This is only set if allow_incomplete is set bool has_unclosed_quote = false; // Parse the input string into a parse tree // Some errors are detected here bool parsed = parse_tree_from_string(buff_src, allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none, &node_tree, &parse_errors); if (allow_incomplete) { for (size_t i=0; i < parse_errors.size(); i++) { if (parse_errors.at(i).code == parse_error_tokenizer_unterminated_quote) { // Remove this error, since we don't consider it a real error has_unclosed_quote = true; parse_errors.erase(parse_errors.begin() + i); i--; } } } // #1238: If the only error was unterminated quote, then consider this to have parsed successfully. A better fix would be to have parse_tree_from_string return this information directly (but it would be a shame to munge up its nice bool return). if (parse_errors.empty() && has_unclosed_quote) { parsed = true; } if (! parsed) { errored = true; } // has_unclosed_quote may only be set if allow_incomplete is true assert(! has_unclosed_quote || allow_incomplete); // Expand all commands // Verify 'or' and 'and' not used inside pipelines // Verify pipes via parser_is_pipe_forbidden // Verify return only within a function // Verify no variable expansions if (! errored) { const size_t node_tree_size = node_tree.size(); for (size_t i=0; i < node_tree_size; i++) { const parse_node_t &node = node_tree.at(i); if (node.type == symbol_end_command && ! node.has_source()) { // an 'end' without source is an unclosed block has_unclosed_block = true; } else if (node.type == symbol_boolean_statement) { // 'or' and 'and' can be in a pipeline, as long as they're first parse_bool_statement_type_t type = parse_node_tree_t::statement_boolean_type(node); if ((type == parse_bool_and || type == parse_bool_or) && node_tree.statement_is_in_pipeline(node, false /* don't count first */)) { errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, (type == parse_bool_and) ? L"and" : L"or"); } } else if (node.type == symbol_argument) { const wcstring arg_src = node.get_source(buff_src); res |= parse_util_detect_errors_in_argument(node, arg_src, &parse_errors); } else if (node.type == symbol_job) { if (node_tree.job_should_be_backgrounded(node)) { /* Disallow background in the following cases: foo & ; and bar foo & ; or bar if foo & ; end while foo & ; end */ const parse_node_t *job_parent = node_tree.get_parent(node); assert(job_parent != NULL); switch (job_parent->type) { case symbol_if_clause: case symbol_while_header: { assert(node_tree.get_child(*job_parent, 1) == &node); errored = append_syntax_error(&parse_errors, node, BACKGROUND_IN_CONDITIONAL_ERROR_MSG); break; } case symbol_job_list: { // This isn't very complete, e.g. we don't catch 'foo & ; not and bar' assert(node_tree.get_child(*job_parent, 0) == &node); const parse_node_t *next_job_list = node_tree.get_child(*job_parent, 1, symbol_job_list); assert(next_job_list != NULL); const parse_node_t *next_job = node_tree.next_node_in_node_list(*next_job_list, symbol_job, NULL); if (next_job != NULL) { const parse_node_t *next_statement = node_tree.get_child(*next_job, 0, symbol_statement); if (next_statement != NULL) { const parse_node_t *spec_statement = node_tree.get_child(*next_statement, 0); if (spec_statement && spec_statement->type == symbol_boolean_statement) { switch (parse_node_tree_t::statement_boolean_type(*spec_statement)) { // These are not allowed case parse_bool_and: errored = append_syntax_error(&parse_errors, *spec_statement, BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and"); break; case parse_bool_or: errored = append_syntax_error(&parse_errors, *spec_statement, BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or"); break; case parse_bool_not: // This one is OK break; } } } } break; } default: break; } } } else if (node.type == symbol_plain_statement) { // In a few places below, we want to know if we are in a pipeline const bool is_in_pipeline = node_tree.statement_is_in_pipeline(node, true /* count first */); // We need to know the decoration const enum parse_statement_decoration_t decoration = node_tree.decoration_for_plain_statement(node); // Check that we don't try to pipe through exec if (is_in_pipeline && decoration == parse_statement_decoration_exec) { errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, L"exec"); } wcstring command; if (node_tree.command_for_plain_statement(node, buff_src, &command)) { // Check that we can expand the command if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS, NULL)) { // TODO: leverage the resulting errors errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str()); } // Check that pipes are sound if (! errored && parser_is_pipe_forbidden(command) && is_in_pipeline) { errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, command.c_str()); } // Check that we don't return from outside a function // But we allow it if it's 'return --help' if (! errored && command == L"return") { const parse_node_t *ancestor = &node; bool found_function = false; while (ancestor != NULL) { const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor); if (possible_function_header != NULL && possible_function_header->type == symbol_function_header) { found_function = true; break; } ancestor = node_tree.get_parent(*ancestor); } if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) { errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG); } } // Check that we don't break or continue from outside a loop if (! errored && (command == L"break" || command == L"continue")) { // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function. // This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header. bool found_loop = false, end_search = false; const parse_node_t *ancestor = &node; while (ancestor != NULL && ! end_search) { const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); if (loop_or_function_header != NULL) { switch (loop_or_function_header->type) { case symbol_while_header: case symbol_for_header: // this is a loop header, so we can break or continue found_loop = true; end_search = true; break; case symbol_function_header: // this is a function header, so we cannot break or continue. We stop our search here. found_loop = false; end_search = true; break; default: // most likely begin / end style block, which makes no difference break; } } ancestor = node_tree.get_parent(*ancestor); } if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src)) { errored = append_syntax_error(&parse_errors, node, (command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG)); } } // Check that we don't do an invalid builtin (#1252) if (! errored && decoration == parse_statement_decoration_builtin && ! builtin_exists(command)) { errored = append_syntax_error(&parse_errors, node, UNKNOWN_BUILTIN_ERR_MSG, command.c_str()); } } } } } if (errored) res |= PARSER_TEST_ERROR; if (has_unclosed_block || has_unclosed_quote) res |= PARSER_TEST_INCOMPLETE; if (out_errors) { out_errors->swap(parse_errors); } return res; }