/* As lexer_read_until, but elements of char_set preceded by \ are ignored as stops. */ int lexer_read_escaped_until(struct lexer_book *bk, char *char_set) { char *char_set_slash = string_format("\\%s", char_set); int count = 0; do { count += lexer_read_until(bk, char_set_slash); if(!bk->eof && lexer_next_peek(bk) == '\\') { lexer_next_char(bk); /* Jump the slash */ char c = lexer_next_char(bk); count += 2; if(lexer_next_peek(bk) != CHAR_EOF) lexer_add_to_lexeme(bk, c); } else break; } while(!bk->eof); free(char_set_slash); if(bk->eof && !strchr(char_set, CHAR_EOF)) lexer_report_error(bk, "Missing %s\n", char_set); return count; }
int lexer_unquoted_look_ahead_count(struct lexer_book *bk, char *char_set) { char c = -1; int count = 0; int double_quote = 0; int single_quote = 0; do { c = lexer_next_char(bk); count++; if( double_quote || single_quote ) { if( c == '"' && double_quote ) double_quote = 0; else if ( c == '\'' && single_quote ) single_quote = 0; } else if( strchr(char_set, c) ) { break; } else if( c == '\\' ) { lexer_next_char(bk); count++; } else if( c == '"' ) { double_quote = 1; } else if( c == '\'' ) { single_quote = 1; } } while( c != '\n' && c != CHAR_EOF ); lexer_roll_back(bk, count); if( c == CHAR_EOF ) { if( strchr(char_set, CHAR_EOF) ) return count; else return -1; } else if( c == '\n' ) { if( strchr(char_set, '\n') ) return count; else return -1; } else return count; }
struct token *lexer_read_literal_in_expandable_until(struct lexer_book *bk, char end_marker) { char end_markers[7] = { end_marker, '$', '\\', '"', '\'', '#', CHAR_EOF }; int count = 0; do { count += lexer_read_until(bk, end_markers); if(bk->eof) break; char c = lexer_next_peek(bk); if(c == '\\') { lexer_next_char(bk); /* Jump the slash */ char n = lexer_next_char(bk); count += 2; if(lexer_special_escape(n)) lexer_add_to_lexeme(bk, lexer_special_to_code(n)); else lexer_add_to_lexeme(bk, n); } else if(c == '#') { if(end_marker == '\n') { lexer_discard_comments(bk); break; } } else break; } while(!bk->eof); if(bk->eof && strchr(")\"'", end_marker)) lexer_report_error(bk, "Missing closing %c.\n", end_marker); return lexer_pack_token(bk, LITERAL); }
struct token *lexer_read_substitution(struct lexer_book *bk) { int parenthesis = 0; char c = lexer_next_peek(bk); if(c != '$') lexer_report_error(bk, "Expecting $ for variable substitution."); lexer_next_char(bk); /* Jump $ */ if(lexer_next_peek(bk) == '(') { lexer_next_char(bk); /* Jump ( */ parenthesis = 1; } struct token *name = lexer_read_syntax_name(bk); name->type = SUBSTITUTION; if(parenthesis) { if(lexer_next_peek(bk) == ')') lexer_next_char(bk); /* Jump ) */ else lexer_report_error(bk, "Expecting ) for closing variable substitution."); } return name; }
accept_t lexer_read_variable(struct lexer_book * bk, struct token * name) { lexer_discard_white_space(bk); if(lexer_next_peek(bk) == '=') { lexer_next_char(bk); lexer_add_to_lexeme(bk, '='); } else { int c = lexer_next_char(bk); if(lexer_next_peek(bk) != '=') return NO; lexer_add_to_lexeme(bk, c); lexer_next_char(bk); /* Jump = */ } lexer_push_token(bk, lexer_pack_token(bk, VARIABLE)); lexer_push_token(bk, name); lexer_discard_white_space(bk); lexer_read_expandable(bk, '\n'); lexer_roll_back(bk, 1); //Recover '\n' lexer_discard_white_space(bk); if(lexer_next_char(bk) != '\n') return NO; return YES; }
struct token *lexer_read_substitution(struct lexer *lx) { char closer = 0; //closer is either 0 (no closer), ) or }. char c = lexer_next_peek(lx); if(c != '$') lexer_report_error(lx, "Expecting $ for variable substitution."); lexer_next_char(lx); /* Jump $ */ if(lexer_next_peek(lx) == '(') { lexer_next_char(lx); /* Jump ( */ closer = ')'; } else if(lexer_next_peek(lx) == '{') { lexer_next_char(lx); /* Jump { */ closer = '}'; } struct token *name = lexer_read_syntax_name(lx); name->type = TOKEN_SUBSTITUTION; if(closer) { if(lexer_next_peek(lx) == closer) lexer_next_char(lx); /* Jump ) */ else lexer_report_error(lx, "Expecting %c for closing variable substitution.", closer); } return name; }
int lexer_read_variable(struct lexer *lx, struct token *name) { lexer_discard_white_space(lx); if(lexer_next_peek(lx) == '=') { lexer_next_char(lx); lexer_add_to_lexeme(lx, '='); } else { int c = lexer_next_char(lx); if(lexer_next_peek(lx) != '=') lexer_report_error(lx, "Missing = in variable definition."); lexer_add_to_lexeme(lx, c); lexer_next_char(lx); /* Jump = */ } lexer_push_token(lx, lexer_pack_token(lx, TOKEN_VARIABLE)); lexer_push_token(lx, name); lexer_discard_white_space(lx); //Read variable value lexer_push_token(lx, lexer_read_expandable(lx, '\n')); lexer_roll_back(lx, 1); //Recover '\n' lexer_discard_white_space(lx); if(lexer_next_char(lx) != '\n') lexer_report_error(lx, "Missing newline at end of variable definition."); return 1; }
accept_t lexer_read_line(struct lexer_book * bk) { char c = lexer_next_peek(bk); int colon, equal; bk->substitution_mode = ROOT; switch (c) { case CHAR_EOF: /* Found end of file */ lexer_next_char(bk); return YES; break; case '#': lexer_discard_comments(bk); return YES; break; case '\t': bk->substitution_mode = COMMAND; return lexer_read_command(bk); break; case ' ': /* Eat whitespace and try again */ lexer_discard_white_space(bk); return lexer_read_line(bk); break; case '\n': /* Ignore empty lines and try again */ lexer_next_char(bk); return lexer_read_line(bk); break; case '@': /* Jump @ */ bk->substitution_mode = SYNTAX; lexer_next_char(bk); return lexer_read_syntax(bk); break; default: /* Either makeflow keyword (e.g. export), a file list, or variable assignment */ lexer_discard_white_space(bk); colon = lexer_unquoted_look_ahead_count(bk, ":"); equal = lexer_unquoted_look_ahead_count(bk, "="); fprintf(stderr, "%d %d %c\n", colon, equal, c); if((colon != -1) && (equal == -1 || colon < equal)) { bk->substitution_mode = FILES; return lexer_read_file_list(bk); } else { bk->substitution_mode = SYNTAX; return lexer_read_syntax(bk); } break; } }
struct token *lexer_read_command_argument(struct lexer_book *bk) { int c = lexer_next_peek(bk); switch(c) { case CHAR_EOF: /* Found end of file while completing command */ bk->lexeme_end++; bk->eof = 1; if( bk->stream ) return lexer_pack_token(bk, NEWLINE); else return NULL; break; case '\n' : lexer_next_char(bk); /* Jump \n */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, NEWLINE); break; case '#' : lexer_discard_comments(bk); lexer_add_to_lexeme(bk, '\n'); return lexer_pack_token(bk, NEWLINE); case ' ' : case '\t': return lexer_read_white_space(bk); break; case '$' : return lexer_read_substitution(bk); break; case '"' : return lexer_read_expandable(bk, '"'); break; case '<' : case '>' : lexer_next_char(bk); /* Jump <, > */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, IO_REDIRECT); break; case '\'': lexer_add_to_lexeme(bk, '\''); lexer_read_literal(bk); lexer_add_to_lexeme(bk, '\''); return lexer_pack_token(bk, LITERAL); break; default: lexer_read_literal(bk); return lexer_pack_token(bk, LITERAL); break; } }
struct token *lexer_read_file(struct lexer *lx) { int c = lexer_next_peek(lx); switch (c) { case CHAR_EOF: lx->lexeme_end++; lx->eof = 1; if(lx->depth == 0) lexer_report_error(lx, "Found end of file while completing file list.\n"); return NULL; break; case '\n': lexer_next_char(lx); /* Jump \n */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_NEWLINE); break; case '#': lexer_discard_comments(lx); lexer_add_to_lexeme(lx, '\n'); return lexer_pack_token(lx, TOKEN_NEWLINE); case ':': lexer_next_char(lx); /* Jump : */ return lexer_pack_token(lx, TOKEN_COLON); break; case ' ': case '\t': /* Discard white-space and add space token. */ lexer_discard_white_space(lx); return lexer_pack_token(lx, TOKEN_SPACE); break; case '$': return lexer_read_substitution(lx); break; case '\'': lexer_add_to_lexeme(lx, '\''); lexer_read_literal_quoted(lx); lexer_add_to_lexeme(lx, '\''); return lexer_pack_token(lx, TOKEN_LITERAL); break; case '-': if(lexer_peek_remote_rename_syntax(lx)) { lexer_next_char(lx); /* Jump -> */ lexer_next_char(lx); return lexer_pack_token(lx, TOKEN_REMOTE_RENAME); } /* Else fall through */ default: return lexer_read_filename(lx); break; } }
int lexer_read_line(struct lexer * lx) { char c = lexer_next_peek(lx); int colon, equal; switch (c) { case CHAR_EOF: /* Found end of file */ return lexer_next_char(lx); break; case '#': lexer_discard_comments(lx); return 1; break; case '\t': return lexer_read_command(lx); break; case '\n': /* Ignore empty lines and try again */ lexer_next_char(lx); return lexer_read_line(lx); break; case '@': /* Jump @ */ lexer_next_char(lx); return lexer_read_syntax_or_variable(lx); break; default: /* Either makeflow keyword (e.g. export), a file list, or variable assignment */ lexer_discard_white_space(lx); colon = lexer_unquoted_look_ahead_count(lx, ":"); equal = lexer_unquoted_look_ahead_count(lx, "="); /* If there is a colon and it appears before any existing * equal sign read the line as a file list. */ if((colon != -1) && (equal == -1 || colon < equal)) { lexer_read_file_list(lx); } else if(c == ' ' && equal == -1) { /* A command starting with a space.. for backwards compatibility. */ return lexer_read_command(lx); } else { lexer_read_syntax_or_variable(lx); } return 1; break; } }
struct token *lexer_read_command_argument(struct lexer *lx) { int c = lexer_next_peek(lx); switch (c) { case CHAR_EOF: /* Found end of file while completing command */ lx->lexeme_end++; lx->eof = 1; if(lx->depth == 0) lexer_report_error(lx, "Found end of file while completing command.\n"); return NULL; break; case '\n': lexer_next_char(lx); /* Jump \n */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_NEWLINE); break; case '#': lexer_discard_comments(lx); lexer_add_to_lexeme(lx, '\n'); return lexer_pack_token(lx, TOKEN_NEWLINE); case ' ': case '\t': return lexer_read_white_space(lx); break; case '$': return lexer_read_substitution(lx); break; case '"': return lexer_read_expandable(lx, '"'); break; case '<': case '>': lexer_next_char(lx); /* Jump <, > */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_IO_REDIRECT); break; case '\'': lexer_add_to_lexeme(lx, '\''); lexer_read_literal(lx); lexer_add_to_lexeme(lx, '\''); return lexer_pack_token(lx, TOKEN_LITERAL); break; default: lexer_read_literal(lx); return lexer_pack_token(lx, TOKEN_LITERAL); break; } }
struct token *lexer_read_file(struct lexer_book *bk) { int c = lexer_next_peek(bk); switch(c) { case CHAR_EOF: lexer_report_error(bk, "Found end of file while completing file list.\n"); return NULL; break; case '\n' : lexer_next_char(bk); /* Jump \n */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, NEWLINE); break; case '#' : lexer_discard_comments(bk); lexer_add_to_lexeme(bk, '\n'); return lexer_pack_token(bk, NEWLINE); case ':' : lexer_next_char(bk); /* Jump : */ return lexer_pack_token(bk, COLON); break; case ' ' : case '\t': /* Discard white-space and try again */ lexer_discard_white_space(bk); return lexer_read_file(bk); break; case '$' : return lexer_read_substitution(bk); break; case '\'': lexer_add_to_lexeme(bk, '\''); lexer_read_literal_quoted(bk); lexer_add_to_lexeme(bk, '\''); return lexer_pack_token(bk, LITERAL); break; case '-' : if( lexer_peek_remote_rename_syntax(bk) ) { lexer_next_char(bk); /* Jump -> */ lexer_next_char(bk); return lexer_pack_token(bk, REMOTE_RENAME); } /* Else fall through */ default: return lexer_read_filename(bk); break; } }
/* Read everything between single quotes */ int lexer_read_literal_quoted(struct lexer * lx) { int c = lexer_next_peek(lx); if(c != '\'') lexer_report_error(lx, "Missing opening quote.\n"); lexer_add_to_lexeme(lx, lexer_next_char(lx)); /* Add first ' */ int count = lexer_read_escaped_until(lx, "'"); lexer_add_to_lexeme(lx, lexer_next_char(lx)); /* Add second ' */ return count; }
/* Read everything between single quotes */ accept_t lexer_read_literal_quoted(struct lexer_book * bk) { int c = lexer_next_peek(bk); if(c != '\'') return NO; lexer_next_char(bk); /* Jump first ' */ int count = lexer_read_escaped_until(bk, "'"); if(count >= 0) { lexer_next_char(bk); /* Jump second ' */ return YES; } else return NO; }
/* Returns the count of characters that we would have to roll-back to undo the read. */ int lexer_read_until(struct lexer_book *bk, char *char_set) { int count = 0; char c; do { c = lexer_next_peek(bk); if( strchr(char_set, c) ) { return count; } if( c != CHAR_EOF ) lexer_add_to_lexeme(bk, c); lexer_next_char(bk); count++; } while( c != CHAR_EOF ); bk->eof = 1; return count; }
int lexer_read_expandable_recursive(struct lexer_book *bk, char end_marker) { int count = 0; lexer_discard_white_space(bk); while(!bk->eof) { int c = lexer_next_peek(bk); if(c == '$') { count++; lexer_push_token(bk, lexer_read_substitution(bk)); } if(c == '\'') { lexer_read_literal(bk); lexer_push_token(bk, lexer_pack_token(bk, LITERAL)); } else if(c == end_marker) { lexer_next_char(bk); /* Jump end_marker */ return count; } else if(c == '"') count += lexer_read_expandable_recursive(bk, '"'); else if(c == '#' && end_marker != '"') { lexer_discard_comments(bk); return count; } else { count++; lexer_push_token(bk, lexer_read_literal_in_expandable_until(bk, end_marker)); } } /* Found eof before end_marker */ abort(); }
/*Decide si el siguiente caracter leido pertenece a 'Ch'. * consume el caracter leido *Deja el resultado accesible en 'result'*/ static bool is_the_next_char (Lexer *input, const char *ch){ bool result = false; bstring taken = NULL; /*Pre:*/ assert (input != NULL); /*Leo el siguiente caracter para ver si pertenece a 'ch'*/ if (!lexer_is_off (input)){ lexer_next_char (input, ch); /*si (leyo algo) ´o´ (no EOF y no leyo nada)*/ if (!lexer_is_off (input)){ taken = lexer_item (input); /*Decide si leyo el caracter perteneciente a 'ch'*/ if ((blength (taken) > 0)){ result = true; } /*libero el puntero despues de usarlo*/ bdestroy (taken); } } return result; }
int lexer_next_peek(struct lexer_book *bk) { /* Read next chunk if necessary */ int c = lexer_next_char(bk); lexer_roll_back(bk, 1); return c; }
int lexer_next_peek(struct lexer *lx) { /* Read next chunk if necessary */ int c = lexer_next_char(lx); lexer_roll_back(lx, 1); return c; }
/* A comment starts with # and ends with a newline, or end-of-file */ void lexer_discard_comments(struct lexer_book *bk) { if(lexer_next_peek(bk) != '#') lexer_report_error(bk, "Expecting a comment."); char c; do { c = lexer_next_char(bk); } while(c != '\n' && c != CHAR_EOF); }
int lexer_discard_white_space(struct lexer *lx) { int count = 0; while(strchr(WHITE_SPACE, lexer_next_peek(lx))) { lexer_next_char(lx); count++; } return count; }
int lexer_peek_remote_rename_syntax(struct lexer *lx) { if(lexer_next_peek(lx) != '-') return 0; lexer_next_char(lx); int is_gt = (lexer_next_peek(lx) == '>'); lexer_roll_back(lx, 1); return is_gt; }
int lexer_peek_remote_rename_syntax(struct lexer_book *bk) { if(lexer_next_peek(bk) != '-') return 0; lexer_next_char(bk); int is_gt = (lexer_next_peek(bk) == '>'); lexer_roll_back(bk, 1); return is_gt; }
//opened tracks whether it is the opening (opened = 0) or closing (opened = 1) double quote we encounter. struct list *lexer_read_expandable_recursive(struct lexer *lx, char end_marker, int opened) { lexer_discard_white_space(lx); struct list *tokens = list_create(); while(!lx->eof) { int c = lexer_next_peek(lx); if(c == '$') { list_push_tail(tokens, lexer_read_substitution(lx)); } if(c == '\'') { lexer_read_literal(lx); list_push_tail(tokens, lexer_pack_token(lx, TOKEN_LITERAL)); } else if(c == '"' && opened == 0) { lexer_add_to_lexeme(lx, lexer_next_char(lx)); list_push_tail(tokens, lexer_pack_token(lx, TOKEN_LITERAL)); // Add first " tokens = list_splice(tokens, lexer_read_expandable_recursive(lx, '"', 1)); lexer_add_to_lexeme(lx, '"'); list_push_tail(tokens, lexer_pack_token(lx, TOKEN_LITERAL)); // Add closing " if(end_marker == '"') return tokens; } else if(c == '#' && end_marker != '"') { lexer_discard_comments(lx); } else if(c == end_marker) { lexer_next_char(lx); /* Jump end_marker */ return tokens; } else { list_push_tail(tokens, lexer_read_literal_in_expandable_until(lx, end_marker)); } } lexer_report_error(lx, "Found EOF before end marker: %c.\n", end_marker); return NULL; }
struct token *lexer_read_literal_in_expandable_until(struct lexer *lx, char end_marker) { const char end_markers[8] = { end_marker, '$', '\\', '"', '\'', '#', CHAR_EOF ,0}; int count = 0; do { count += lexer_read_until(lx, end_markers); if(lx->eof) break; char c = lexer_next_peek(lx); if(c == '\\') { lexer_next_char(lx); /* Jump the slash */ char n = lexer_next_char(lx); count += 2; if(lexer_special_escape(n)) { lexer_add_to_lexeme(lx, lexer_special_to_code(n)); } else if(n == '\n') { lexer_add_to_lexeme(lx, ' '); } else { lexer_add_to_lexeme(lx, n); } } else if(c == '#') { if(end_marker == '\n') { lexer_discard_comments(lx); break; } } else break; } while(!lx->eof); if(lx->eof && strchr(")\"'", end_marker)) lexer_report_error(lx, "Missing closing %c.\n", end_marker); return lexer_pack_token(lx, TOKEN_LITERAL); }
/* Read a filename, adding '-' to names when - is not followed by >. The 'recursive' comes because the function calls itself when completing a name when it added a -. */ int lexer_read_filename_recursive(struct lexer *lx) { int count = lexer_read_escaped_until(lx, FILENAME_LIMITS); if(count < 1) return count; if(lexer_next_peek(lx) == '-' && !lexer_peek_remote_rename_syntax(lx)) { lexer_add_to_lexeme(lx, '-'); lexer_next_char(lx); count++; count += lexer_read_filename_recursive(lx); } return count; }
/* Consolidates a sequence of white space into a single SPACE token */ struct token *lexer_read_white_space(struct lexer *lx) { int count = lexer_discard_white_space(lx); while(strchr(WHITE_SPACE, lexer_next_peek(lx))) { count++; lexer_next_char(lx); } if(count > 0) { lexer_add_to_lexeme(lx, ' '); return lexer_pack_token(lx, TOKEN_SPACE); } else lexer_report_error(lx, "Expecting white space."); return NULL; }
/* Consolidates a sequence of white space into a single SPACE token */ struct token *lexer_read_white_space(struct lexer_book *bk) { int count = 0; while(strchr(WHITE_SPACE, lexer_next_peek(bk))) { count++; lexer_next_char(bk); } if(count > 0) { lexer_add_to_lexeme(bk, ' '); return lexer_pack_token(bk, SPACE); } else lexer_report_error(bk, "Expecting white space."); return NULL; }
int lexer_read_variable_list(struct lexer * lx) { int c; while((c = lexer_next_peek(lx)) != '\n') { lexer_discard_white_space(lx); if(c == '#') { lexer_discard_comments(lx); lexer_roll_back(lx, 1); //Recover the newline break; } lexer_push_token(lx, lexer_read_syntax_name(lx)); } lexer_add_to_lexeme(lx, lexer_next_char(lx)); //Drop the newline lexer_push_token(lx, lexer_pack_token(lx, TOKEN_NEWLINE)); return 1; }