void history_t::add_with_file_detection(const wcstring &str) { ASSERT_IS_MAIN_THREAD(); path_list_t potential_paths; tokenizer tokenizer; for( tok_init( &tokenizer, str.c_str(), TOK_SQUASH_ERRORS ); tok_has_next( &tokenizer ); tok_next( &tokenizer ) ) { int type = tok_last_type( &tokenizer ); if (type == TOK_STRING) { const wchar_t *token_cstr = tok_last(&tokenizer); if (token_cstr) { wcstring potential_path = token_cstr; if (unescape_string(potential_path, false) && string_could_be_path(potential_path)) { potential_paths.push_front(potential_path); } } } } tok_destroy(&tokenizer); if (! potential_paths.empty()) { /* We have some paths. Make a context. */ file_detection_context_t *context = new file_detection_context_t(this, str); /* Store the potential paths. Reverse them to put them in the same order as in the command. */ potential_paths.reverse(); context->potential_paths.swap(potential_paths); iothread_perform(threaded_perform_file_detection, perform_file_detection_done, context); } }
void parse_util_token_extent( const wchar_t *buff, int cursor_pos, const wchar_t **tok_begin, const wchar_t **tok_end, const wchar_t **prev_begin, const wchar_t **prev_end ) { const wchar_t *begin, *end; int pos; wchar_t *buffcpy; tokenizer tok; const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL; CHECK( buff, ); assert( cursor_pos >= 0 ); parse_util_cmdsubst_extent( buff, cursor_pos, &begin, &end ); if( !end || !begin ) { return; } pos = cursor_pos - (begin - buff); a = buff + pos; b = a; pa = buff + pos; pb = pa; assert( begin >= buff ); assert( begin <= (buff+wcslen(buff) ) ); assert( end >= begin ); assert( end <= (buff+wcslen(buff) ) ); buffcpy = wcsndup( begin, end-begin ); if( !buffcpy ) { DIE_MEM(); } for( tok_init( &tok, buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS ); tok_has_next( &tok ); tok_next( &tok ) ) { int tok_begin = tok_get_pos( &tok ); int tok_end=tok_begin; /* Calculate end of token */ if( tok_last_type( &tok ) == TOK_STRING ) { tok_end +=wcslen(tok_last(&tok)); } /* Cursor was before beginning of this token, means that the cursor is between two tokens, so we set it to a zero element string and break */ if( tok_begin > pos ) { a = b = (wchar_t *)buff + pos; break; } /* If cursor is inside the token, this is the token we are looking for. If so, set a and b and break */ if( (tok_last_type( &tok ) == TOK_STRING) && (tok_end >= pos ) ) { a = begin + tok_get_pos( &tok ); b = a + wcslen(tok_last(&tok)); break; } /* Remember previous string token */ if( tok_last_type( &tok ) == TOK_STRING ) { pa = begin + tok_get_pos( &tok ); pb = pa + wcslen(tok_last(&tok)); } } free( buffcpy); tok_destroy( &tok ); if( tok_begin ) { *tok_begin = a; } if( tok_end ) { *tok_end = b; } if( prev_begin ) { *prev_begin = pa; } if( prev_end ) { *prev_end = pb; } assert( pa >= buff ); assert( pa <= (buff+wcslen(buff) ) ); assert( pb >= pa ); assert( pb <= (buff+wcslen(buff) ) ); }
/** Get the beginning and end of the job or process definition under the cursor */ static void job_or_process_extent( const wchar_t *buff, int cursor_pos, const wchar_t **a, const wchar_t **b, int process ) { const wchar_t *begin, *end; int pos; wchar_t *buffcpy; int finished=0; tokenizer tok; CHECK( buff, ); if( a ) { *a=0; } if( b ) { *b = 0; } parse_util_cmdsubst_extent( buff, cursor_pos, &begin, &end ); if( !end || !begin ) { return; } pos = cursor_pos - (begin - buff); if( a ) { *a = begin; } if( b ) { *b = end; } buffcpy = wcsndup( begin, end-begin ); if( !buffcpy ) { DIE_MEM(); } for( tok_init( &tok, buffcpy, TOK_ACCEPT_UNFINISHED ); tok_has_next( &tok ) && !finished; tok_next( &tok ) ) { int tok_begin = tok_get_pos( &tok ); switch( tok_last_type( &tok ) ) { case TOK_PIPE: { if( !process ) { break; } } case TOK_END: case TOK_BACKGROUND: { if( tok_begin >= pos ) { finished=1; if( b ) { *b = (wchar_t *)buff + tok_begin; } } else { if( a ) { *a = (wchar_t *)buff + tok_begin+1; } } break; } } } free( buffcpy); tok_destroy( &tok ); }
void parse_util_get_parameter_info( const wcstring &cmd, const size_t pos, wchar_t *quote, size_t *offset, int *type ) { size_t prev_pos=0; wchar_t last_quote = '\0'; int unfinished; tokenizer tok; tok_init( &tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS ); for( ; tok_has_next( &tok ); tok_next( &tok ) ) { if( tok_get_pos( &tok ) > pos ) break; if( tok_last_type( &tok ) == TOK_STRING ) last_quote = get_quote( tok_last( &tok ), pos - tok_get_pos( &tok ) ); if( type != NULL ) *type = tok_last_type( &tok ); prev_pos = tok_get_pos( &tok ); } tok_destroy( &tok ); wchar_t *cmd_tmp = wcsdup(cmd.c_str()); cmd_tmp[pos]=0; size_t cmdlen = wcslen( cmd_tmp ); unfinished = (cmdlen==0); if( !unfinished ) { unfinished = (quote != 0); if( !unfinished ) { if( wcschr( L" \t\n\r", cmd_tmp[cmdlen-1] ) != 0 ) { if( ( cmdlen == 1) || (cmd_tmp[cmdlen-2] != L'\\') ) { unfinished=1; } } } } if( quote ) *quote = last_quote; if( offset != 0 ) { if( !unfinished ) { while( (cmd_tmp[prev_pos] != 0) && (wcschr( L";|",cmd_tmp[prev_pos])!= 0) ) prev_pos++; *offset = prev_pos; } else { *offset = pos; } } free(cmd_tmp); }
// This function does I/O static void tokenize( const wchar_t * const buff, std::vector<int> &color, const int pos, wcstring_list_t *error, const wcstring &working_directory, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); wcstring cmd; int had_cmd=0; wcstring last_cmd; int len; int accept_switches = 1; int use_function = 1; int use_command = 1; int use_builtin = 1; CHECK( buff, ); len = wcslen(buff); if( !len ) return; std::fill(color.begin(), color.end(), -1); tokenizer tok; for( tok_init( &tok, buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS ); tok_has_next( &tok ); tok_next( &tok ) ) { int last_type = tok_last_type( &tok ); switch( last_type ) { case TOK_STRING: { if( had_cmd ) { /*Parameter */ wchar_t *param = tok_last( &tok ); if( param[0] == L'-' ) { if (wcscmp( param, L"--" ) == 0 ) { accept_switches = 0; color.at(tok_get_pos( &tok )) = HIGHLIGHT_PARAM; } else if( accept_switches ) { if( complete_is_valid_option( last_cmd.c_str(), param, error, false /* no autoload */ ) ) color.at(tok_get_pos( &tok )) = HIGHLIGHT_PARAM; else color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; } else { color.at(tok_get_pos( &tok )) = HIGHLIGHT_PARAM; } } else { color.at(tok_get_pos( &tok )) = HIGHLIGHT_PARAM; } if( cmd == L"cd" ) { wcstring dir = tok_last( &tok ); if (expand_one(dir, EXPAND_SKIP_CMDSUBST)) { int is_help = string_prefixes_string(dir, L"--help") || string_prefixes_string(dir, L"-h"); if( !is_help && ! is_potential_cd_path(dir, working_directory, PATH_EXPAND_TILDE, NULL)) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; } } } /* Highlight the parameter. highlight_param wants to write one more color than we have characters (hysterical raisins) so allocate one more in the vector. But don't copy it back. */ const wcstring param_str = param; int tok_pos = tok_get_pos(&tok); std::vector<int>::const_iterator where = color.begin() + tok_pos; std::vector<int> subcolors(where, where + param_str.size()); subcolors.push_back(-1); highlight_param(param_str, subcolors, pos-tok_pos, error); /* Copy the subcolors back into our colors array */ std::copy(subcolors.begin(), subcolors.begin() + param_str.size(), color.begin() + tok_pos); } else { /* Command. First check that the command actually exists. */ cmd = tok_last( &tok ); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); if (! expanded || has_expand_reserved(cmd.c_str())) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; } else { bool is_cmd = false; int is_subcommand = 0; int mark = tok_get_pos( &tok ); color.at(tok_get_pos( &tok )) = HIGHLIGHT_COMMAND; if( parser_keywords_is_subcommand( cmd ) ) { int sw; if( cmd == L"builtin") { use_function = 0; use_command = 0; use_builtin = 1; } else if( cmd == L"command") { use_command = 1; use_function = 0; use_builtin = 0; } tok_next( &tok ); sw = parser_keywords_is_switch( tok_last( &tok ) ); if( !parser_keywords_is_block( cmd ) && sw == ARG_SWITCH ) { /* The 'builtin' and 'command' builtins are normally followed by another command, but if they are invoked with a switch, they aren't. */ use_command = 1; use_function = 1; use_builtin = 2; } else { if( sw == ARG_SKIP ) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_PARAM; mark = tok_get_pos( &tok ); } is_subcommand = 1; } tok_set_pos( &tok, mark ); } if( !is_subcommand ) { /* OK, this is a command, it has been successfully expanded and everything looks ok. Lets check if the command exists. */ /* First check if it is a builtin or function, since we don't have to stat any files for that */ if (! is_cmd && use_builtin ) is_cmd = builtin_exists( cmd ); if (! is_cmd && use_function ) is_cmd = function_exists_no_autoload( cmd, vars ); /* Moving on to expensive tests */ /* Check if this is a regular command */ if (! is_cmd && use_command ) { is_cmd = path_get_path( cmd, NULL, vars ); } /* Maybe it is a path for a implicit cd command. */ if (! is_cmd) { if (use_builtin || (use_function && function_exists_no_autoload( L"cd", vars))) is_cmd = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars); } if( is_cmd ) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_COMMAND; } else { if( error ) { error->push_back(format_string(L"Unknown command \'%ls\'", cmd.c_str())); } color.at(tok_get_pos( &tok )) = (HIGHLIGHT_ERROR); } had_cmd = 1; } if( had_cmd ) { last_cmd = tok_last( &tok ); } } } break; } case TOK_REDIRECT_NOCLOB: case TOK_REDIRECT_OUT: case TOK_REDIRECT_IN: case TOK_REDIRECT_APPEND: case TOK_REDIRECT_FD: { if( !had_cmd ) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; if( error ) error->push_back(L"Redirection without a command"); break; } wcstring target_str; const wchar_t *target=NULL; color.at(tok_get_pos( &tok )) = HIGHLIGHT_REDIRECTION; tok_next( &tok ); /* Check that we are redirecting into a file */ switch( tok_last_type( &tok ) ) { case TOK_STRING: { target_str = tok_last( &tok ); if (expand_one(target_str, EXPAND_SKIP_CMDSUBST)) { target = target_str.c_str(); } /* Redirect filename may contain a cmdsubst. If so, it will be ignored/not flagged. */ } break; default: { size_t pos = tok_get_pos(&tok); if (pos < color.size()) { color.at(pos) = HIGHLIGHT_ERROR; } if( error ) error->push_back(L"Invalid redirection"); } } if( target != 0 ) { wcstring dir = target; size_t slash_idx = dir.find_last_of(L'/'); struct stat buff; /* If file is in directory other than '.', check that the directory exists. */ if( slash_idx != wcstring::npos ) { dir.resize(slash_idx); if( wstat( dir, &buff ) == -1 ) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; if( error ) error->push_back(format_string(L"Directory \'%ls\' does not exist", dir.c_str())); } } /* If the file is read from or appended to, check if it exists. */ if( last_type == TOK_REDIRECT_IN || last_type == TOK_REDIRECT_APPEND ) { if( wstat( target, &buff ) == -1 ) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; if( error ) error->push_back(format_string(L"File \'%ls\' does not exist", target)); } } if( last_type == TOK_REDIRECT_NOCLOB ) { if( wstat( target, &buff ) != -1 ) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; if( error ) error->push_back(format_string(L"File \'%ls\' exists", target)); } } } break; } case TOK_PIPE: case TOK_BACKGROUND: { if( had_cmd ) { color.at(tok_get_pos( &tok )) = HIGHLIGHT_END; had_cmd = 0; use_command = 1; use_function = 1; use_builtin = 1; accept_switches = 1; } else { color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; if( error ) error->push_back(L"No job to put in background" ); } break; } case TOK_END: { color.at(tok_get_pos( &tok )) = HIGHLIGHT_END; had_cmd = 0; use_command = 1; use_function = 1; use_builtin = 1; accept_switches = 1; break; } case TOK_COMMENT: { color.at(tok_get_pos( &tok )) = HIGHLIGHT_COMMENT; break; } case TOK_ERROR: default: { /* If the tokenizer reports an error, highlight it as such. */ if( error ) error->push_back(tok_last( &tok)); color.at(tok_get_pos( &tok )) = HIGHLIGHT_ERROR; break; } } } tok_destroy( &tok ); }
/* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command, wcstring_list_t *out_arguments, int *out_last_arg_pos) { if (str.empty()) return false; wcstring cmd; wcstring_list_t args; int arg_pos = -1; bool had_cmd = false; tokenizer tok; for (tok_init( &tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok)) { int last_type = tok_last_type(&tok); switch( last_type ) { case TOK_STRING: { if( had_cmd ) { /* Parameter to the command. We store these escaped. */ args.push_back(tok_last(&tok)); arg_pos = tok_get_pos(&tok); } else { /* Command. First check that the command actually exists. */ wcstring local_cmd = tok_last( &tok ); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); if (! expanded || has_expand_reserved(cmd.c_str())) { /* We can't expand this cmd, ignore it */ } else { bool is_subcommand = false; int mark = tok_get_pos(&tok); if (parser_keywords_is_subcommand(cmd)) { int sw; tok_next( &tok ); sw = parser_keywords_is_switch( tok_last( &tok ) ); if( !parser_keywords_is_block( cmd ) && sw == ARG_SWITCH ) { /* It's an argument to the subcommand itself */ } else { if( sw == ARG_SKIP ) mark = tok_get_pos( &tok ); is_subcommand = true; } tok_set_pos( &tok, mark ); } if (!is_subcommand) { /* It's really a command */ had_cmd = true; cmd = local_cmd; } } } break; } case TOK_REDIRECT_NOCLOB: case TOK_REDIRECT_OUT: case TOK_REDIRECT_IN: case TOK_REDIRECT_APPEND: case TOK_REDIRECT_FD: { if( !had_cmd ) { break; } tok_next( &tok ); break; } case TOK_PIPE: case TOK_BACKGROUND: case TOK_END: { had_cmd = false; cmd.empty(); args.empty(); arg_pos = -1; break; } case TOK_COMMENT: case TOK_ERROR: default: { break; } } } tok_destroy( &tok ); /* Remember our command if we have one */ if (had_cmd) { if (out_command) out_command->swap(cmd); if (out_arguments) out_arguments->swap(args); if (out_last_arg_pos) *out_last_arg_pos = arg_pos; } return had_cmd; }
/** Output the specified selection. \param begin start of selection \param end end of selection \param cut_at_cursor whether printing should stop at the surrent cursor position \param tokenize whether the string should be tokenized, printing one string token on every line and skipping non-string tokens */ static void write_part( const wchar_t *begin, const wchar_t *end, int cut_at_cursor, int tokenize ) { tokenizer tok; string_buffer_t out; wchar_t *buff; int pos; pos = get_cursor_pos()-(begin-get_buffer()); if( tokenize ) { buff = wcsndup( begin, end-begin ); // fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end ); sb_init( &out ); for( tok_init( &tok, buff, TOK_ACCEPT_UNFINISHED ); tok_has_next( &tok ); tok_next( &tok ) ) { if( (cut_at_cursor) && (tok_get_pos( &tok)+wcslen(tok_last( &tok)) >= pos) ) break; switch( tok_last_type( &tok ) ) { case TOK_STRING: { wchar_t *tmp = unescape( tok_last( &tok ), UNESCAPE_INCOMPLETE ); sb_append( &out, tmp, L"\n", (void *)0 ); free( tmp ); break; } } } sb_append( sb_out, (wchar_t *)out.buff ); free( buff ); tok_destroy( &tok ); sb_destroy( &out ); } else { wchar_t *buff, *esc; if( cut_at_cursor ) { end = begin+pos; } buff = wcsndup( begin, end-begin ); esc = unescape( buff, UNESCAPE_INCOMPLETE ); // debug( 0, L"woot2 %ls -> %ls", buff, esc ); sb_append( sb_out, esc ); sb_append( sb_out, L"\n" ); free( esc ); free( buff ); } }
/** Test the tokenizer */ static void test_tok() { tokenizer t; say( L"Testing tokenizer" ); say( L"Testing invalid input" ); tok_init( &t, 0, 0 ); if( tok_last_type( &t ) != TOK_ERROR ) { err(L"Invalid input to tokenizer was undetected" ); } say( L"Testing use of broken tokenizer" ); if( !tok_has_next( &t ) ) { err( L"tok_has_next() should return 1 once on broken tokenizer" ); } tok_next( &t ); if( tok_last_type( &t ) != TOK_ERROR ) { err(L"Invalid input to tokenizer was undetected" ); } /* This should crash if there is a bug. No reliable way to detect otherwise. */ say( L"Test destruction of broken tokenizer" ); tok_destroy( &t ); { wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)]"; const int types[] = { TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_END } ; int i; say( L"Test correct tokenization" ); for( i=0, tok_init( &t, str, 0 ); i<(sizeof(types)/sizeof(int)); i++,tok_next( &t ) ) { if( types[i] != tok_last_type( &t ) ) { err( L"Tokenization error:"); wprintf( L"Token number %d of string \n'%ls'\n, expected token type %ls, got token '%ls' of type %ls\n", i+1, str, tok_get_desc(types[i]), tok_last(&t), tok_get_desc(tok_last_type( &t )) ); } } } }