struct token *lexer_read_literal_in_expandable_until(struct lexer_book *bk, char end_marker) { char end_markers[7] = { end_marker, '$', '\\', '"', '\'', '#', CHAR_EOF }; int count = 0; do { count += lexer_read_until(bk, end_markers); if(bk->eof) break; char c = lexer_next_peek(bk); if(c == '\\') { lexer_next_char(bk); /* Jump the slash */ char n = lexer_next_char(bk); count += 2; if(lexer_special_escape(n)) lexer_add_to_lexeme(bk, lexer_special_to_code(n)); else lexer_add_to_lexeme(bk, n); } else if(c == '#') { if(end_marker == '\n') { lexer_discard_comments(bk); break; } } else break; } while(!bk->eof); if(bk->eof && strchr(")\"'", end_marker)) lexer_report_error(bk, "Missing closing %c.\n", end_marker); return lexer_pack_token(bk, LITERAL); }
/* As lexer_read_until, but elements of char_set preceded by \ are ignored as stops, with \n replaced with spaces. */ int lexer_read_escaped_until(struct lexer *lx, char *char_set) { char *char_set_slash = string_format("\\%s", char_set); int count = 0; do { count += lexer_read_until(lx, char_set_slash); if(!lx->eof && lexer_next_peek(lx) == '\\') { lexer_next_char(lx); /* Jump the slash */ char c = lexer_next_char(lx); count += 2; if(lexer_next_peek(lx) != CHAR_EOF) { if(c == '\n') { lexer_add_to_lexeme(lx, ' '); } else { lexer_add_to_lexeme(lx, c); } } } else break; } while(!lx->eof); free(char_set_slash); if(lx->eof && !strchr(char_set, CHAR_EOF)) lexer_report_error(lx, "Missing %s\n", char_set); return count; }
accept_t lexer_read_variable(struct lexer_book * bk, struct token * name) { lexer_discard_white_space(bk); if(lexer_next_peek(bk) == '=') { lexer_next_char(bk); lexer_add_to_lexeme(bk, '='); } else { int c = lexer_next_char(bk); if(lexer_next_peek(bk) != '=') return NO; lexer_add_to_lexeme(bk, c); lexer_next_char(bk); /* Jump = */ } lexer_push_token(bk, lexer_pack_token(bk, VARIABLE)); lexer_push_token(bk, name); lexer_discard_white_space(bk); lexer_read_expandable(bk, '\n'); lexer_roll_back(bk, 1); //Recover '\n' lexer_discard_white_space(bk); if(lexer_next_char(bk) != '\n') return NO; return YES; }
int lexer_read_variable(struct lexer *lx, struct token *name) { lexer_discard_white_space(lx); if(lexer_next_peek(lx) == '=') { lexer_next_char(lx); lexer_add_to_lexeme(lx, '='); } else { int c = lexer_next_char(lx); if(lexer_next_peek(lx) != '=') lexer_report_error(lx, "Missing = in variable definition."); lexer_add_to_lexeme(lx, c); lexer_next_char(lx); /* Jump = */ } lexer_push_token(lx, lexer_pack_token(lx, TOKEN_VARIABLE)); lexer_push_token(lx, name); lexer_discard_white_space(lx); //Read variable value lexer_push_token(lx, lexer_read_expandable(lx, '\n')); lexer_roll_back(lx, 1); //Recover '\n' lexer_discard_white_space(lx); if(lexer_next_char(lx) != '\n') lexer_report_error(lx, "Missing newline at end of variable definition."); return 1; }
struct token *lexer_read_command_argument(struct lexer_book *bk) { int c = lexer_next_peek(bk); switch(c) { case CHAR_EOF: /* Found end of file while completing command */ bk->lexeme_end++; bk->eof = 1; if( bk->stream ) return lexer_pack_token(bk, NEWLINE); else return NULL; break; case '\n' : lexer_next_char(bk); /* Jump \n */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, NEWLINE); break; case '#' : lexer_discard_comments(bk); lexer_add_to_lexeme(bk, '\n'); return lexer_pack_token(bk, NEWLINE); case ' ' : case '\t': return lexer_read_white_space(bk); break; case '$' : return lexer_read_substitution(bk); break; case '"' : return lexer_read_expandable(bk, '"'); break; case '<' : case '>' : lexer_next_char(bk); /* Jump <, > */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, IO_REDIRECT); break; case '\'': lexer_add_to_lexeme(bk, '\''); lexer_read_literal(bk); lexer_add_to_lexeme(bk, '\''); return lexer_pack_token(bk, LITERAL); break; default: lexer_read_literal(bk); return lexer_pack_token(bk, LITERAL); break; } }
struct token *lexer_read_file(struct lexer *lx) { int c = lexer_next_peek(lx); switch (c) { case CHAR_EOF: lx->lexeme_end++; lx->eof = 1; if(lx->depth == 0) lexer_report_error(lx, "Found end of file while completing file list.\n"); return NULL; break; case '\n': lexer_next_char(lx); /* Jump \n */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_NEWLINE); break; case '#': lexer_discard_comments(lx); lexer_add_to_lexeme(lx, '\n'); return lexer_pack_token(lx, TOKEN_NEWLINE); case ':': lexer_next_char(lx); /* Jump : */ return lexer_pack_token(lx, TOKEN_COLON); break; case ' ': case '\t': /* Discard white-space and add space token. */ lexer_discard_white_space(lx); return lexer_pack_token(lx, TOKEN_SPACE); break; case '$': return lexer_read_substitution(lx); break; case '\'': lexer_add_to_lexeme(lx, '\''); lexer_read_literal_quoted(lx); lexer_add_to_lexeme(lx, '\''); return lexer_pack_token(lx, TOKEN_LITERAL); break; case '-': if(lexer_peek_remote_rename_syntax(lx)) { lexer_next_char(lx); /* Jump -> */ lexer_next_char(lx); return lexer_pack_token(lx, TOKEN_REMOTE_RENAME); } /* Else fall through */ default: return lexer_read_filename(lx); break; } }
struct token *lexer_read_command_argument(struct lexer *lx) { int c = lexer_next_peek(lx); switch (c) { case CHAR_EOF: /* Found end of file while completing command */ lx->lexeme_end++; lx->eof = 1; if(lx->depth == 0) lexer_report_error(lx, "Found end of file while completing command.\n"); return NULL; break; case '\n': lexer_next_char(lx); /* Jump \n */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_NEWLINE); break; case '#': lexer_discard_comments(lx); lexer_add_to_lexeme(lx, '\n'); return lexer_pack_token(lx, TOKEN_NEWLINE); case ' ': case '\t': return lexer_read_white_space(lx); break; case '$': return lexer_read_substitution(lx); break; case '"': return lexer_read_expandable(lx, '"'); break; case '<': case '>': lexer_next_char(lx); /* Jump <, > */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_IO_REDIRECT); break; case '\'': lexer_add_to_lexeme(lx, '\''); lexer_read_literal(lx); lexer_add_to_lexeme(lx, '\''); return lexer_pack_token(lx, TOKEN_LITERAL); break; default: lexer_read_literal(lx); return lexer_pack_token(lx, TOKEN_LITERAL); break; } }
struct token *lexer_read_file(struct lexer_book *bk) { int c = lexer_next_peek(bk); switch(c) { case CHAR_EOF: lexer_report_error(bk, "Found end of file while completing file list.\n"); return NULL; break; case '\n' : lexer_next_char(bk); /* Jump \n */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, NEWLINE); break; case '#' : lexer_discard_comments(bk); lexer_add_to_lexeme(bk, '\n'); return lexer_pack_token(bk, NEWLINE); case ':' : lexer_next_char(bk); /* Jump : */ return lexer_pack_token(bk, COLON); break; case ' ' : case '\t': /* Discard white-space and try again */ lexer_discard_white_space(bk); return lexer_read_file(bk); break; case '$' : return lexer_read_substitution(bk); break; case '\'': lexer_add_to_lexeme(bk, '\''); lexer_read_literal_quoted(bk); lexer_add_to_lexeme(bk, '\''); return lexer_pack_token(bk, LITERAL); break; case '-' : if( lexer_peek_remote_rename_syntax(bk) ) { lexer_next_char(bk); /* Jump -> */ lexer_next_char(bk); return lexer_pack_token(bk, REMOTE_RENAME); } /* Else fall through */ default: return lexer_read_filename(bk); break; } }
/* Read everything between single quotes */ int lexer_read_literal_quoted(struct lexer * lx) { int c = lexer_next_peek(lx); if(c != '\'') lexer_report_error(lx, "Missing opening quote.\n"); lexer_add_to_lexeme(lx, lexer_next_char(lx)); /* Add first ' */ int count = lexer_read_escaped_until(lx, "'"); lexer_add_to_lexeme(lx, lexer_next_char(lx)); /* Add second ' */ return count; }
/* Returns the count of characters that we would have to roll-back to undo the read. */ int lexer_read_until(struct lexer_book *bk, char *char_set) { int count = 0; char c; do { c = lexer_next_peek(bk); if( strchr(char_set, c) ) { return count; } if( c != CHAR_EOF ) lexer_add_to_lexeme(bk, c); lexer_next_char(bk); count++; } while( c != CHAR_EOF ); bk->eof = 1; return count; }
struct token *lexer_read_literal_in_expandable_until(struct lexer *lx, char end_marker) { const char end_markers[8] = { end_marker, '$', '\\', '"', '\'', '#', CHAR_EOF ,0}; int count = 0; do { count += lexer_read_until(lx, end_markers); if(lx->eof) break; char c = lexer_next_peek(lx); if(c == '\\') { lexer_next_char(lx); /* Jump the slash */ char n = lexer_next_char(lx); count += 2; if(lexer_special_escape(n)) { lexer_add_to_lexeme(lx, lexer_special_to_code(n)); } else if(n == '\n') { lexer_add_to_lexeme(lx, ' '); } else { lexer_add_to_lexeme(lx, n); } } else if(c == '#') { if(end_marker == '\n') { lexer_discard_comments(lx); break; } } else break; } while(!lx->eof); if(lx->eof && strchr(")\"'", end_marker)) lexer_report_error(lx, "Missing closing %c.\n", end_marker); return lexer_pack_token(lx, TOKEN_LITERAL); }
//opened tracks whether it is the opening (opened = 0) or closing (opened = 1) double quote we encounter. struct list *lexer_read_expandable_recursive(struct lexer *lx, char end_marker, int opened) { lexer_discard_white_space(lx); struct list *tokens = list_create(); while(!lx->eof) { int c = lexer_next_peek(lx); if(c == '$') { list_push_tail(tokens, lexer_read_substitution(lx)); } if(c == '\'') { lexer_read_literal(lx); list_push_tail(tokens, lexer_pack_token(lx, TOKEN_LITERAL)); } else if(c == '"' && opened == 0) { lexer_add_to_lexeme(lx, lexer_next_char(lx)); list_push_tail(tokens, lexer_pack_token(lx, TOKEN_LITERAL)); // Add first " tokens = list_splice(tokens, lexer_read_expandable_recursive(lx, '"', 1)); lexer_add_to_lexeme(lx, '"'); list_push_tail(tokens, lexer_pack_token(lx, TOKEN_LITERAL)); // Add closing " if(end_marker == '"') return tokens; } else if(c == '#' && end_marker != '"') { lexer_discard_comments(lx); } else if(c == end_marker) { lexer_next_char(lx); /* Jump end_marker */ return tokens; } else { list_push_tail(tokens, lexer_read_literal_in_expandable_until(lx, end_marker)); } } lexer_report_error(lx, "Found EOF before end marker: %c.\n", end_marker); return NULL; }
/* Read a filename, adding '-' to names when - is not followed by >. The 'recursive' comes because the function calls itself when completing a name when it added a -. */ int lexer_read_filename_recursive(struct lexer_book *bk) { int count = lexer_read_escaped_until(bk, FILENAME_LIMITS); if(count < 1) return count; if(lexer_next_peek(bk) == '-' && !lexer_peek_remote_rename_syntax(bk)) { lexer_add_to_lexeme(bk, '-'); count++; count += lexer_read_filename_recursive(bk); } return count; }
/* Read a filename, adding '-' to names when - is not followed by >. The 'recursive' comes because the function calls itself when completing a name when it added a -. */ int lexer_read_filename_recursive(struct lexer *lx) { int count = lexer_read_escaped_until(lx, FILENAME_LIMITS); if(count < 1) return count; if(lexer_next_peek(lx) == '-' && !lexer_peek_remote_rename_syntax(lx)) { lexer_add_to_lexeme(lx, '-'); lexer_next_char(lx); count++; count += lexer_read_filename_recursive(lx); } return count; }
/* Consolidates a sequence of white space into a single SPACE token */ struct token *lexer_read_white_space(struct lexer_book *bk) { int count = 0; while(strchr(WHITE_SPACE, lexer_next_peek(bk))) { count++; lexer_next_char(bk); } if(count > 0) { lexer_add_to_lexeme(bk, ' '); return lexer_pack_token(bk, SPACE); } else lexer_report_error(bk, "Expecting white space."); return NULL; }
/* Consolidates a sequence of white space into a single SPACE token */ struct token *lexer_read_white_space(struct lexer *lx) { int count = lexer_discard_white_space(lx); while(strchr(WHITE_SPACE, lexer_next_peek(lx))) { count++; lexer_next_char(lx); } if(count > 0) { lexer_add_to_lexeme(lx, ' '); return lexer_pack_token(lx, TOKEN_SPACE); } else lexer_report_error(lx, "Expecting white space."); return NULL; }
int lexer_read_variable_list(struct lexer * lx) { int c; while((c = lexer_next_peek(lx)) != '\n') { lexer_discard_white_space(lx); if(c == '#') { lexer_discard_comments(lx); lexer_roll_back(lx, 1); //Recover the newline break; } lexer_push_token(lx, lexer_read_syntax_name(lx)); } lexer_add_to_lexeme(lx, lexer_next_char(lx)); //Drop the newline lexer_push_token(lx, lexer_pack_token(lx, TOKEN_NEWLINE)); return 1; }
accept_t lexer_read_variable_list(struct lexer_book * bk) { int c; while((c = lexer_next_peek(bk)) != '\n') { lexer_discard_white_space(bk); if(c == '#') { lexer_discard_comments(bk); lexer_roll_back(bk, 1); //Recover the newline break; } lexer_push_token(bk, lexer_read_syntax_name(bk)); } lexer_add_to_lexeme(bk, lexer_next_char(bk)); //Drop the newline lexer_push_token(bk, lexer_pack_token(bk, NEWLINE)); return YES; }
/* Returns the count of characters that we would have to roll-back to undo the read. */ int lexer_read_until(struct lexer *lx, const char *char_set ) { int count = 0; char c; do { c = lexer_next_peek(lx); if(strchr(char_set, c)) { return count; } if(c != CHAR_EOF) lexer_add_to_lexeme(lx, c); lexer_next_char(lx); count++; } while(c != CHAR_EOF); lx->eof = 1; return count; }