/* As lexer_read_until, but elements of char_set preceded by \ are ignored as stops. */ int lexer_read_escaped_until(struct lexer_book *bk, char *char_set) { char *char_set_slash = string_format("\\%s", char_set); int count = 0; do { count += lexer_read_until(bk, char_set_slash); if(!bk->eof && lexer_next_peek(bk) == '\\') { lexer_next_char(bk); /* Jump the slash */ char c = lexer_next_char(bk); count += 2; if(lexer_next_peek(bk) != CHAR_EOF) lexer_add_to_lexeme(bk, c); } else break; } while(!bk->eof); free(char_set_slash); if(bk->eof && !strchr(char_set, CHAR_EOF)) lexer_report_error(bk, "Missing %s\n", char_set); return count; }
struct token *lexer_read_substitution(struct lexer *lx) { char closer = 0; //closer is either 0 (no closer), ) or }. char c = lexer_next_peek(lx); if(c != '$') lexer_report_error(lx, "Expecting $ for variable substitution."); lexer_next_char(lx); /* Jump $ */ if(lexer_next_peek(lx) == '(') { lexer_next_char(lx); /* Jump ( */ closer = ')'; } else if(lexer_next_peek(lx) == '{') { lexer_next_char(lx); /* Jump { */ closer = '}'; } struct token *name = lexer_read_syntax_name(lx); name->type = TOKEN_SUBSTITUTION; if(closer) { if(lexer_next_peek(lx) == closer) lexer_next_char(lx); /* Jump ) */ else lexer_report_error(lx, "Expecting %c for closing variable substitution.", closer); } return name; }
accept_t lexer_read_variable(struct lexer_book * bk, struct token * name) { lexer_discard_white_space(bk); if(lexer_next_peek(bk) == '=') { lexer_next_char(bk); lexer_add_to_lexeme(bk, '='); } else { int c = lexer_next_char(bk); if(lexer_next_peek(bk) != '=') return NO; lexer_add_to_lexeme(bk, c); lexer_next_char(bk); /* Jump = */ } lexer_push_token(bk, lexer_pack_token(bk, VARIABLE)); lexer_push_token(bk, name); lexer_discard_white_space(bk); lexer_read_expandable(bk, '\n'); lexer_roll_back(bk, 1); //Recover '\n' lexer_discard_white_space(bk); if(lexer_next_char(bk) != '\n') return NO; return YES; }
struct token *lexer_read_substitution(struct lexer_book *bk) { int parenthesis = 0; char c = lexer_next_peek(bk); if(c != '$') lexer_report_error(bk, "Expecting $ for variable substitution."); lexer_next_char(bk); /* Jump $ */ if(lexer_next_peek(bk) == '(') { lexer_next_char(bk); /* Jump ( */ parenthesis = 1; } struct token *name = lexer_read_syntax_name(bk); name->type = SUBSTITUTION; if(parenthesis) { if(lexer_next_peek(bk) == ')') lexer_next_char(bk); /* Jump ) */ else lexer_report_error(bk, "Expecting ) for closing variable substitution."); } return name; }
int lexer_read_variable(struct lexer *lx, struct token *name) { lexer_discard_white_space(lx); if(lexer_next_peek(lx) == '=') { lexer_next_char(lx); lexer_add_to_lexeme(lx, '='); } else { int c = lexer_next_char(lx); if(lexer_next_peek(lx) != '=') lexer_report_error(lx, "Missing = in variable definition."); lexer_add_to_lexeme(lx, c); lexer_next_char(lx); /* Jump = */ } lexer_push_token(lx, lexer_pack_token(lx, TOKEN_VARIABLE)); lexer_push_token(lx, name); lexer_discard_white_space(lx); //Read variable value lexer_push_token(lx, lexer_read_expandable(lx, '\n')); lexer_roll_back(lx, 1); //Recover '\n' lexer_discard_white_space(lx); if(lexer_next_char(lx) != '\n') lexer_report_error(lx, "Missing newline at end of variable definition."); return 1; }
int lexer_peek_remote_rename_syntax(struct lexer_book *bk) { if(lexer_next_peek(bk) != '-') return 0; lexer_next_char(bk); int is_gt = (lexer_next_peek(bk) == '>'); lexer_roll_back(bk, 1); return is_gt; }
int lexer_peek_remote_rename_syntax(struct lexer *lx) { if(lexer_next_peek(lx) != '-') return 0; lexer_next_char(lx); int is_gt = (lexer_next_peek(lx) == '>'); lexer_roll_back(lx, 1); return is_gt; }
struct token *lexer_read_literal_in_expandable_until(struct lexer_book *bk, char end_marker) { char end_markers[7] = { end_marker, '$', '\\', '"', '\'', '#', CHAR_EOF }; int count = 0; do { count += lexer_read_until(bk, end_markers); if(bk->eof) break; char c = lexer_next_peek(bk); if(c == '\\') { lexer_next_char(bk); /* Jump the slash */ char n = lexer_next_char(bk); count += 2; if(lexer_special_escape(n)) lexer_add_to_lexeme(bk, lexer_special_to_code(n)); else lexer_add_to_lexeme(bk, n); } else if(c == '#') { if(end_marker == '\n') { lexer_discard_comments(bk); break; } } else break; } while(!bk->eof); if(bk->eof && strchr(")\"'", end_marker)) lexer_report_error(bk, "Missing closing %c.\n", end_marker); return lexer_pack_token(bk, LITERAL); }
int lexer_read_expandable_recursive(struct lexer_book *bk, char end_marker) { int count = 0; lexer_discard_white_space(bk); while(!bk->eof) { int c = lexer_next_peek(bk); if(c == '$') { count++; lexer_push_token(bk, lexer_read_substitution(bk)); } if(c == '\'') { lexer_read_literal(bk); lexer_push_token(bk, lexer_pack_token(bk, LITERAL)); } else if(c == end_marker) { lexer_next_char(bk); /* Jump end_marker */ return count; } else if(c == '"') count += lexer_read_expandable_recursive(bk, '"'); else if(c == '#' && end_marker != '"') { lexer_discard_comments(bk); return count; } else { count++; lexer_push_token(bk, lexer_read_literal_in_expandable_until(bk, end_marker)); } } /* Found eof before end_marker */ abort(); }
accept_t lexer_read_command(struct lexer_book *bk) { if(lexer_next_peek(bk) != '\t') return NO; int count = 0; lexer_discard_white_space(bk); struct token *t; do { t = lexer_read_command_argument(bk); if(!t) break; if(t->type == NEWLINE && count == 0) { lexer_report_error(bk, "Missing command line.\n"); } else if(t->type != NEWLINE && count == 0) { /* Add command start marker */ lexer_push_token(bk, lexer_pack_token(bk, COMMAND)); } lexer_push_token(bk, t); count++; } while(t->type != NEWLINE); if(count > 1) return YES; else { return NO; } }
/* Returns the count of characters that we would have to roll-back to undo the read. */ int lexer_read_until(struct lexer_book *bk, char *char_set) { int count = 0; char c; do { c = lexer_next_peek(bk); if( strchr(char_set, c) ) { return count; } if( c != CHAR_EOF ) lexer_add_to_lexeme(bk, c); lexer_next_char(bk); count++; } while( c != CHAR_EOF ); bk->eof = 1; return count; }
accept_t lexer_read_line(struct lexer_book * bk) { char c = lexer_next_peek(bk); int colon, equal; bk->substitution_mode = ROOT; switch (c) { case CHAR_EOF: /* Found end of file */ lexer_next_char(bk); return YES; break; case '#': lexer_discard_comments(bk); return YES; break; case '\t': bk->substitution_mode = COMMAND; return lexer_read_command(bk); break; case ' ': /* Eat whitespace and try again */ lexer_discard_white_space(bk); return lexer_read_line(bk); break; case '\n': /* Ignore empty lines and try again */ lexer_next_char(bk); return lexer_read_line(bk); break; case '@': /* Jump @ */ bk->substitution_mode = SYNTAX; lexer_next_char(bk); return lexer_read_syntax(bk); break; default: /* Either makeflow keyword (e.g. export), a file list, or variable assignment */ lexer_discard_white_space(bk); colon = lexer_unquoted_look_ahead_count(bk, ":"); equal = lexer_unquoted_look_ahead_count(bk, "="); fprintf(stderr, "%d %d %c\n", colon, equal, c); if((colon != -1) && (equal == -1 || colon < equal)) { bk->substitution_mode = FILES; return lexer_read_file_list(bk); } else { bk->substitution_mode = SYNTAX; return lexer_read_syntax(bk); } break; } }
accept_t lexer_read_literal(struct lexer_book * bk) { int c = lexer_next_peek(bk); if(c == '\'') return lexer_read_literal_quoted(bk); else return lexer_read_literal_unquoted(bk); }
int lexer_read_literal(struct lexer * lx) { int c = lexer_next_peek(lx); if(c == '\'') return lexer_read_literal_quoted(lx); else return lexer_read_literal_unquoted(lx); }
/* A comment starts with # and ends with a newline, or end-of-file */ void lexer_discard_comments(struct lexer_book *bk) { if(lexer_next_peek(bk) != '#') lexer_report_error(bk, "Expecting a comment."); char c; do { c = lexer_next_char(bk); } while(c != '\n' && c != CHAR_EOF); }
int lexer_discard_white_space(struct lexer *lx) { int count = 0; while(strchr(WHITE_SPACE, lexer_next_peek(lx))) { lexer_next_char(lx); count++; } return count; }
struct token *lexer_read_command_argument(struct lexer_book *bk) { int c = lexer_next_peek(bk); switch(c) { case CHAR_EOF: /* Found end of file while completing command */ bk->lexeme_end++; bk->eof = 1; if( bk->stream ) return lexer_pack_token(bk, NEWLINE); else return NULL; break; case '\n' : lexer_next_char(bk); /* Jump \n */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, NEWLINE); break; case '#' : lexer_discard_comments(bk); lexer_add_to_lexeme(bk, '\n'); return lexer_pack_token(bk, NEWLINE); case ' ' : case '\t': return lexer_read_white_space(bk); break; case '$' : return lexer_read_substitution(bk); break; case '"' : return lexer_read_expandable(bk, '"'); break; case '<' : case '>' : lexer_next_char(bk); /* Jump <, > */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, IO_REDIRECT); break; case '\'': lexer_add_to_lexeme(bk, '\''); lexer_read_literal(bk); lexer_add_to_lexeme(bk, '\''); return lexer_pack_token(bk, LITERAL); break; default: lexer_read_literal(bk); return lexer_pack_token(bk, LITERAL); break; } }
struct token *lexer_read_file(struct lexer *lx) { int c = lexer_next_peek(lx); switch (c) { case CHAR_EOF: lx->lexeme_end++; lx->eof = 1; if(lx->depth == 0) lexer_report_error(lx, "Found end of file while completing file list.\n"); return NULL; break; case '\n': lexer_next_char(lx); /* Jump \n */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_NEWLINE); break; case '#': lexer_discard_comments(lx); lexer_add_to_lexeme(lx, '\n'); return lexer_pack_token(lx, TOKEN_NEWLINE); case ':': lexer_next_char(lx); /* Jump : */ return lexer_pack_token(lx, TOKEN_COLON); break; case ' ': case '\t': /* Discard white-space and add space token. */ lexer_discard_white_space(lx); return lexer_pack_token(lx, TOKEN_SPACE); break; case '$': return lexer_read_substitution(lx); break; case '\'': lexer_add_to_lexeme(lx, '\''); lexer_read_literal_quoted(lx); lexer_add_to_lexeme(lx, '\''); return lexer_pack_token(lx, TOKEN_LITERAL); break; case '-': if(lexer_peek_remote_rename_syntax(lx)) { lexer_next_char(lx); /* Jump -> */ lexer_next_char(lx); return lexer_pack_token(lx, TOKEN_REMOTE_RENAME); } /* Else fall through */ default: return lexer_read_filename(lx); break; } }
int lexer_read_line(struct lexer * lx) { char c = lexer_next_peek(lx); int colon, equal; switch (c) { case CHAR_EOF: /* Found end of file */ return lexer_next_char(lx); break; case '#': lexer_discard_comments(lx); return 1; break; case '\t': return lexer_read_command(lx); break; case '\n': /* Ignore empty lines and try again */ lexer_next_char(lx); return lexer_read_line(lx); break; case '@': /* Jump @ */ lexer_next_char(lx); return lexer_read_syntax_or_variable(lx); break; default: /* Either makeflow keyword (e.g. export), a file list, or variable assignment */ lexer_discard_white_space(lx); colon = lexer_unquoted_look_ahead_count(lx, ":"); equal = lexer_unquoted_look_ahead_count(lx, "="); /* If there is a colon and it appears before any existing * equal sign read the line as a file list. */ if((colon != -1) && (equal == -1 || colon < equal)) { lexer_read_file_list(lx); } else if(c == ' ' && equal == -1) { /* A command starting with a space.. for backwards compatibility. */ return lexer_read_command(lx); } else { lexer_read_syntax_or_variable(lx); } return 1; break; } }
struct token *lexer_read_file(struct lexer_book *bk) { int c = lexer_next_peek(bk); switch(c) { case CHAR_EOF: lexer_report_error(bk, "Found end of file while completing file list.\n"); return NULL; break; case '\n' : lexer_next_char(bk); /* Jump \n */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, NEWLINE); break; case '#' : lexer_discard_comments(bk); lexer_add_to_lexeme(bk, '\n'); return lexer_pack_token(bk, NEWLINE); case ':' : lexer_next_char(bk); /* Jump : */ return lexer_pack_token(bk, COLON); break; case ' ' : case '\t': /* Discard white-space and try again */ lexer_discard_white_space(bk); return lexer_read_file(bk); break; case '$' : return lexer_read_substitution(bk); break; case '\'': lexer_add_to_lexeme(bk, '\''); lexer_read_literal_quoted(bk); lexer_add_to_lexeme(bk, '\''); return lexer_pack_token(bk, LITERAL); break; case '-' : if( lexer_peek_remote_rename_syntax(bk) ) { lexer_next_char(bk); /* Jump -> */ lexer_next_char(bk); return lexer_pack_token(bk, REMOTE_RENAME); } /* Else fall through */ default: return lexer_read_filename(bk); break; } }
struct token *lexer_read_command_argument(struct lexer *lx) { int c = lexer_next_peek(lx); switch (c) { case CHAR_EOF: /* Found end of file while completing command */ lx->lexeme_end++; lx->eof = 1; if(lx->depth == 0) lexer_report_error(lx, "Found end of file while completing command.\n"); return NULL; break; case '\n': lexer_next_char(lx); /* Jump \n */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_NEWLINE); break; case '#': lexer_discard_comments(lx); lexer_add_to_lexeme(lx, '\n'); return lexer_pack_token(lx, TOKEN_NEWLINE); case ' ': case '\t': return lexer_read_white_space(lx); break; case '$': return lexer_read_substitution(lx); break; case '"': return lexer_read_expandable(lx, '"'); break; case '<': case '>': lexer_next_char(lx); /* Jump <, > */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_IO_REDIRECT); break; case '\'': lexer_add_to_lexeme(lx, '\''); lexer_read_literal(lx); lexer_add_to_lexeme(lx, '\''); return lexer_pack_token(lx, TOKEN_LITERAL); break; default: lexer_read_literal(lx); return lexer_pack_token(lx, TOKEN_LITERAL); break; } }
/* Read everything between single quotes */ int lexer_read_literal_quoted(struct lexer * lx) { int c = lexer_next_peek(lx); if(c != '\'') lexer_report_error(lx, "Missing opening quote.\n"); lexer_add_to_lexeme(lx, lexer_next_char(lx)); /* Add first ' */ int count = lexer_read_escaped_until(lx, "'"); lexer_add_to_lexeme(lx, lexer_next_char(lx)); /* Add second ' */ return count; }
/* Read a filename, adding '-' to names when - is not followed by >. The 'recursive' comes because the function calls itself when completing a name when it added a -. */ int lexer_read_filename_recursive(struct lexer_book *bk) { int count = lexer_read_escaped_until(bk, FILENAME_LIMITS); if(count < 1) return count; if(lexer_next_peek(bk) == '-' && !lexer_peek_remote_rename_syntax(bk)) { lexer_add_to_lexeme(bk, '-'); count++; count += lexer_read_filename_recursive(bk); } return count; }
/* Read a filename, adding '-' to names when - is not followed by >. The 'recursive' comes because the function calls itself when completing a name when it added a -. */ int lexer_read_filename_recursive(struct lexer *lx) { int count = lexer_read_escaped_until(lx, FILENAME_LIMITS); if(count < 1) return count; if(lexer_next_peek(lx) == '-' && !lexer_peek_remote_rename_syntax(lx)) { lexer_add_to_lexeme(lx, '-'); lexer_next_char(lx); count++; count += lexer_read_filename_recursive(lx); } return count; }
/* Read everything between single quotes */ accept_t lexer_read_literal_quoted(struct lexer_book * bk) { int c = lexer_next_peek(bk); if(c != '\'') return NO; lexer_next_char(bk); /* Jump first ' */ int count = lexer_read_escaped_until(bk, "'"); if(count >= 0) { lexer_next_char(bk); /* Jump second ' */ return YES; } else return NO; }
/* Consolidates a sequence of white space into a single SPACE token */ struct token *lexer_read_white_space(struct lexer_book *bk) { int count = 0; while(strchr(WHITE_SPACE, lexer_next_peek(bk))) { count++; lexer_next_char(bk); } if(count > 0) { lexer_add_to_lexeme(bk, ' '); return lexer_pack_token(bk, SPACE); } else lexer_report_error(bk, "Expecting white space."); return NULL; }
/* Consolidates a sequence of white space into a single SPACE token */ struct token *lexer_read_white_space(struct lexer *lx) { int count = lexer_discard_white_space(lx); while(strchr(WHITE_SPACE, lexer_next_peek(lx))) { count++; lexer_next_char(lx); } if(count > 0) { lexer_add_to_lexeme(lx, ' '); return lexer_pack_token(lx, TOKEN_SPACE); } else lexer_report_error(lx, "Expecting white space."); return NULL; }
void lexer_append_all_tokens(struct lexer_book *bk, struct lexer_book *bk_s) { struct token *head_s; bk_s->substitution_mode = bk->substitution_mode; while( !bk_s->eof ) { if(lexer_next_peek(bk_s) == CHAR_EOF) { /* Found end of string while completing command */ bk_s->lexeme_end++; bk_s->eof = 1; } else { switch(bk_s->substitution_mode) { case CHAR_EOF: case COMMAND: head_s = lexer_read_command_argument(bk_s); break; case FILES: head_s = lexer_read_file(bk_s); break; case SYNTAX: lexer_read_expandable(bk_s, CHAR_EOF); head_s = lexer_pack_token(bk_s, LITERAL); break; default: lexer_read_line(bk_s); continue; break; } if(head_s) lexer_push_token(bk_s, head_s); } } while( (head_s = list_pop_tail(bk_s->token_queue)) != NULL ) list_push_head(bk->token_queue, head_s); }
int lexer_read_variable_list(struct lexer * lx) { int c; while((c = lexer_next_peek(lx)) != '\n') { lexer_discard_white_space(lx); if(c == '#') { lexer_discard_comments(lx); lexer_roll_back(lx, 1); //Recover the newline break; } lexer_push_token(lx, lexer_read_syntax_name(lx)); } lexer_add_to_lexeme(lx, lexer_next_char(lx)); //Drop the newline lexer_push_token(lx, lexer_pack_token(lx, TOKEN_NEWLINE)); return 1; }
accept_t lexer_read_variable_list(struct lexer_book * bk) { int c; while((c = lexer_next_peek(bk)) != '\n') { lexer_discard_white_space(bk); if(c == '#') { lexer_discard_comments(bk); lexer_roll_back(bk, 1); //Recover the newline break; } lexer_push_token(bk, lexer_read_syntax_name(bk)); } lexer_add_to_lexeme(bk, lexer_next_char(bk)); //Drop the newline lexer_push_token(bk, lexer_pack_token(bk, NEWLINE)); return YES; }