static int dag_parse_variable(struct lexer *bk, struct dag_node *n) { struct token *t = lexer_next_token(bk); char mode = t->lexeme[0]; //=, or + (assign or append) lexer_free_token(t); t = lexer_next_token(bk); if(t->type != TOKEN_LITERAL) { lexer_report_error(bk, "Literal variable name expected."); } char *name = xxstrdup(t->lexeme); lexer_free_token(t); t = lexer_next_token(bk); if(t->type != TOKEN_LITERAL) { lexer_report_error(bk, "Expected LITERAL token, got: %s\n", lexer_print_token(t)); } char *value = xxstrdup(t->lexeme); lexer_free_token(t); struct hash_table *current_table; int nodeid; if(n) { current_table = n->variables; nodeid = n->nodeid; } else { current_table = bk->d->variables; nodeid = bk->d->nodeid_counter; } int result = 1; switch(mode) { case '=': dag_variable_add_value(name, current_table, nodeid, value); debug(D_MAKEFLOW_PARSER, "%s appending to variable name=%s, value=%s", (n ? "node" : "dag"), name, value); break; case '+': dag_parse_append_variable(bk, nodeid, n, name, value); debug(D_MAKEFLOW_PARSER, "%s variable name=%s, value=%s", (n ? "node" : "dag"), name, value); break; default: lexer_report_error(bk, "Unknown variable operator."); result = 0; } dag_parse_process_special_variable(bk, n, nodeid, name, value); free(name); free(value); return result; }
int lexer_read_variable(struct lexer *lx, struct token *name) { lexer_discard_white_space(lx); if(lexer_next_peek(lx) == '=') { lexer_next_char(lx); lexer_add_to_lexeme(lx, '='); } else { int c = lexer_next_char(lx); if(lexer_next_peek(lx) != '=') lexer_report_error(lx, "Missing = in variable definition."); lexer_add_to_lexeme(lx, c); lexer_next_char(lx); /* Jump = */ } lexer_push_token(lx, lexer_pack_token(lx, TOKEN_VARIABLE)); lexer_push_token(lx, name); lexer_discard_white_space(lx); //Read variable value lexer_push_token(lx, lexer_read_expandable(lx, '\n')); lexer_roll_back(lx, 1); //Recover '\n' lexer_discard_white_space(lx); if(lexer_next_char(lx) != '\n') lexer_report_error(lx, "Missing newline at end of variable definition."); return 1; }
struct token *lexer_read_substitution(struct lexer *lx) { char closer = 0; //closer is either 0 (no closer), ) or }. char c = lexer_next_peek(lx); if(c != '$') lexer_report_error(lx, "Expecting $ for variable substitution."); lexer_next_char(lx); /* Jump $ */ if(lexer_next_peek(lx) == '(') { lexer_next_char(lx); /* Jump ( */ closer = ')'; } else if(lexer_next_peek(lx) == '{') { lexer_next_char(lx); /* Jump { */ closer = '}'; } struct token *name = lexer_read_syntax_name(lx); name->type = TOKEN_SUBSTITUTION; if(closer) { if(lexer_next_peek(lx) == closer) lexer_next_char(lx); /* Jump ) */ else lexer_report_error(lx, "Expecting %c for closing variable substitution.", closer); } return name; }
struct token *lexer_read_substitution(struct lexer_book *bk) { int parenthesis = 0; char c = lexer_next_peek(bk); if(c != '$') lexer_report_error(bk, "Expecting $ for variable substitution."); lexer_next_char(bk); /* Jump $ */ if(lexer_next_peek(bk) == '(') { lexer_next_char(bk); /* Jump ( */ parenthesis = 1; } struct token *name = lexer_read_syntax_name(bk); name->type = SUBSTITUTION; if(parenthesis) { if(lexer_next_peek(bk) == ')') lexer_next_char(bk); /* Jump ) */ else lexer_report_error(bk, "Expecting ) for closing variable substitution."); } return name; }
static int dag_parse_node(struct lexer *bk) { struct token *t = lexer_next_token(bk); if(t->type != TOKEN_FILES) { lexer_report_error(bk, "Error reading rule."); } lexer_free_token(t); struct dag_node *n; n = dag_node_create(bk->d, bk->line_number); if(verbose_parsing && bk->d->nodeid_counter % parsing_rule_mod_counter == 0) { fprintf(stdout, "\rRules parsed: %d", bk->d->nodeid_counter + 1); fflush(stdout); } n->category = bk->category; list_push_tail(n->category->nodes, n); dag_parse_node_filelist(bk, n); bk->environment->node = n; /* Read variables, if any */ while((t = lexer_peek_next_token(bk)) && t->type != TOKEN_COMMAND) { switch (t->type) { case TOKEN_VARIABLE: dag_parse_variable(bk, n); break; default: lexer_report_error(bk, "Expected COMMAND or VARIABLE, got: %s", lexer_print_token(t)); break; } } if(!t) { lexer_report_error(bk, "Rule does not have a command.\n"); } dag_parse_node_command(bk, n); bk->environment->node = NULL; n->next = bk->d->nodes; bk->d->nodes = n; itable_insert(bk->d->node_table, n->nodeid, n); debug(D_MAKEFLOW_PARSER, "Setting resource category '%s' for rule %d.\n", n->category->label, n->nodeid); dag_node_fill_resources(n); dag_node_print_debug_resources(n); return 1; }
static int dag_parse_node_command(struct lexer *bk, struct dag_node *n) { struct token *t; //Jump COMMAND token. t = lexer_next_token(bk); lexer_free_token(t); char *local = dag_variable_lookup_string("BATCH_LOCAL", bk->environment); if(local) { if(string_istrue(local)) n->local_job = 1; free(local); } /* Read command modifiers. */ while((t = lexer_peek_next_token(bk)) && t->type != TOKEN_COMMAND_MOD_END) { t = lexer_next_token(bk); if(strcmp(t->lexeme, "LOCAL") == 0) { n->local_job = 1; } else if(strcmp(t->lexeme, "MAKEFLOW") == 0) { n->nested_job = 1; } else { lexer_report_error(bk, "Parser does not know about modifier: %s.\n", t->lexeme); } lexer_free_token(t); } if(!t) { lexer_report_error(bk, "Malformed command."); } //Free COMMAND_MOD_END token. t = lexer_next_token(bk); lexer_free_token(t); if(n->nested_job) { return dag_parse_node_nested_makeflow(bk, n); } else { return dag_parse_node_regular_command(bk, n); } }
static int dag_parse_export(struct lexer *bk) { struct token *t, *vtoken, *vname; const char *name; int count = 0; while((t = lexer_peek_next_token(bk)) && t->type != TOKEN_NEWLINE) { switch(t->type) { case TOKEN_VARIABLE: vtoken = lexer_next_token(bk); //Save VARIABLE token. vname = lexer_peek_next_token(bk); if(vname->type == TOKEN_LITERAL) { name = xxstrdup(vname->lexeme); } else { lexer_report_error(bk, "Variable definition has name missing.\n"); } lexer_preppend_token(bk, vtoken); //Restore VARIABLE token. dag_parse_variable(bk, NULL); break; case TOKEN_LITERAL: t = lexer_next_token(bk); name = xxstrdup(t->lexeme); lexer_free_token(t); break; default: lexer_report_error(bk, "Malformed export syntax.\n"); break; } set_insert(bk->d->export_vars, name); count++; debug(D_MAKEFLOW_PARSER, "export variable: %s", name); } if(t) { //Free newline t = lexer_next_token(bk); lexer_free_token(t); } if(count < 1) { lexer_report_error(bk, "The export syntax needs the explicit name of the variables to be exported.\n"); } return 1; }
struct lexer *lexer_create_substitution(struct lexer *lx, struct token *t) { char *substitution = NULL; if(lx->environment) { substitution = dag_variable_lookup_string(t->lexeme, lx->environment); } struct lexer *lx_s; if(!substitution) { fatal("Variable %s has not yet been defined at line % " PRId64 ".\n", t->lexeme, lx->line_number); substitution = xxstrdup(""); } lx_s = lexer_create(STRING, substitution, lx->line_number, lx->column_number); lx_s->depth = lx->depth + 1; if(lx_s->depth > MAX_SUBSTITUTION_DEPTH) lexer_report_error(lx, "More than %d recursive subsitutions attempted.\n", MAX_SUBSTITUTION_DEPTH); free(substitution); return lx_s; }
/* As lexer_read_until, but elements of char_set preceded by \ are ignored as stops. */ int lexer_read_escaped_until(struct lexer_book *bk, char *char_set) { char *char_set_slash = string_format("\\%s", char_set); int count = 0; do { count += lexer_read_until(bk, char_set_slash); if(!bk->eof && lexer_next_peek(bk) == '\\') { lexer_next_char(bk); /* Jump the slash */ char c = lexer_next_char(bk); count += 2; if(lexer_next_peek(bk) != CHAR_EOF) lexer_add_to_lexeme(bk, c); } else break; } while(!bk->eof); free(char_set_slash); if(bk->eof && !strchr(char_set, CHAR_EOF)) lexer_report_error(bk, "Missing %s\n", char_set); return count; }
accept_t lexer_read_command(struct lexer_book *bk) { if(lexer_next_peek(bk) != '\t') return NO; int count = 0; lexer_discard_white_space(bk); struct token *t; do { t = lexer_read_command_argument(bk); if(!t) break; if(t->type == NEWLINE && count == 0) { lexer_report_error(bk, "Missing command line.\n"); } else if(t->type != NEWLINE && count == 0) { /* Add command start marker */ lexer_push_token(bk, lexer_pack_token(bk, COMMAND)); } lexer_push_token(bk, t); count++; } while(t->type != NEWLINE); if(count > 1) return YES; else { return NO; } }
struct token *lexer_read_literal_in_expandable_until(struct lexer_book *bk, char end_marker) { char end_markers[7] = { end_marker, '$', '\\', '"', '\'', '#', CHAR_EOF }; int count = 0; do { count += lexer_read_until(bk, end_markers); if(bk->eof) break; char c = lexer_next_peek(bk); if(c == '\\') { lexer_next_char(bk); /* Jump the slash */ char n = lexer_next_char(bk); count += 2; if(lexer_special_escape(n)) lexer_add_to_lexeme(bk, lexer_special_to_code(n)); else lexer_add_to_lexeme(bk, n); } else if(c == '#') { if(end_marker == '\n') { lexer_discard_comments(bk); break; } } else break; } while(!bk->eof); if(bk->eof && strchr(")\"'", end_marker)) lexer_report_error(bk, "Missing closing %c.\n", end_marker); return lexer_pack_token(bk, LITERAL); }
struct token *lexer_read_filename(struct lexer_book *bk) { int count = lexer_read_filename_recursive(bk); if(count < 1) lexer_report_error(bk, "Expecting a filename."); return lexer_pack_token(bk, LITERAL); }
int lexer_read_command(struct lexer *lx) { struct list *tokens = lexer_read_command_aux(lx); struct token *t; if(list_size(tokens) < 2) { /* If the only token in the list is a NEWLINE, then this is an empty line. */ while((t = list_pop_head(tokens))) lexer_free_token(t); list_delete(tokens); return 1; } /* Add command start marker.*/ lexer_push_token(lx, lexer_pack_token(lx, TOKEN_COMMAND)); /* Merge command tokens into main queue. */ /* First merge command modifiers, if any. */ list_first_item(tokens); while((t = list_peek_head(tokens))) { if(t->type == TOKEN_LITERAL && ((strcmp(t->lexeme, "LOCAL") == 0) || (strcmp(t->lexeme, "MAKEFLOW") == 0) )) { t = list_pop_head(tokens); lexer_push_token(lx, t); } else if(t->type == TOKEN_SPACE) { //Discard spaces between modifiers. t = list_pop_head(tokens); lexer_free_token(t); } else { break; } } /* Mark end of modifiers. */ lexer_push_token(lx, lexer_pack_token(lx, TOKEN_COMMAND_MOD_END)); /* Now merge tha actual command tokens */ /* Gives the number of actual command tokens, not taking into account command modifiers. */ int count = 0; while((t = list_pop_head(tokens))) { count++; lexer_push_token(lx, t); } list_delete(tokens); if(count < 1) lexer_report_error(lx, "Command is empty.\n"); return count; }
struct token *lexer_read_filename(struct lexer *lx) { int count = lexer_read_filename_recursive(lx); if(count < 1) lexer_report_error(lx, "Expecting a filename."); return lexer_pack_token(lx, TOKEN_LITERAL); }
/* A comment starts with # and ends with a newline, or end-of-file */ void lexer_discard_comments(struct lexer_book *bk) { if(lexer_next_peek(bk) != '#') lexer_report_error(bk, "Expecting a comment."); char c; do { c = lexer_next_char(bk); } while(c != '\n' && c != CHAR_EOF); }
struct token *lexer_read_syntax_name(struct lexer_book *bk) { int count; count = lexer_read_until(bk, SYNTAX_LIMITS); if(count < 1) lexer_report_error(bk, "Expecting a keyword or a variable name."); return lexer_pack_token(bk, LITERAL); }
static int dag_parse_node_nested_makeflow(struct lexer *bk, struct dag_node *n) { struct token *t, *start; dag_parse_drop_spaces(bk); //Get the dag's file name. t = lexer_next_token(bk); if(t->type == TOKEN_LITERAL) { n->makeflow_dag = xxstrdup(t->lexeme); start = t; } else { lexer_report_error(bk, "At least the name of the Makeflow file should be specified in a recursive call.\n"); return 0; // not reached, silences warning } dag_parse_drop_spaces(bk); //Get dag's working directory. t = lexer_peek_next_token(bk); if(t->type == TOKEN_LITERAL) { t = lexer_next_token(bk); n->makeflow_cwd = xxstrdup(t->lexeme); lexer_free_token(t); } else { n->makeflow_cwd = xxstrdup("."); } dag_parse_drop_spaces(bk); //Get wrapper's name char *wrapper = NULL; t = lexer_peek_next_token(bk); if(t->type == TOKEN_LITERAL) { wrapper = xxstrdup(t->lexeme); lexer_free_token(t); } else { wrapper = xxstrdup(""); } free(start->lexeme); start->lexeme = string_format("cd %s && %s %s %s", n->makeflow_cwd, wrapper, "makeflow", n->makeflow_dag); free(wrapper); dag_parse_drop_spaces(bk); lexer_preppend_token(bk, start); return dag_parse_node_regular_command(bk, n); }
static int dag_parse_node_regular_command(struct lexer *bk, struct dag_node *n) { struct buffer b; buffer_init(&b); struct token *t; while((t = lexer_next_token(bk)) && t->type != TOKEN_NEWLINE) { switch(t->type) { case TOKEN_SPACE: buffer_printf(&b, " "); break; case TOKEN_LITERAL: buffer_printf(&b, "%s", t->lexeme); break; case TOKEN_IO_REDIRECT: buffer_printf(&b, "%s", t->lexeme); break; default: lexer_report_error(bk, "Unexpected command token: %s.\n", lexer_print_token(t)); break; } lexer_free_token(t); } if(!t) { lexer_report_error(bk, "Command does not end with newline.\n"); } n->command = xxstrdup(buffer_tostring(&b)); buffer_free(&b); debug(D_MAKEFLOW_PARSER, "node command=%s", n->command); return 1; }
struct token *lexer_read_file(struct lexer *lx) { int c = lexer_next_peek(lx); switch (c) { case CHAR_EOF: lx->lexeme_end++; lx->eof = 1; if(lx->depth == 0) lexer_report_error(lx, "Found end of file while completing file list.\n"); return NULL; break; case '\n': lexer_next_char(lx); /* Jump \n */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_NEWLINE); break; case '#': lexer_discard_comments(lx); lexer_add_to_lexeme(lx, '\n'); return lexer_pack_token(lx, TOKEN_NEWLINE); case ':': lexer_next_char(lx); /* Jump : */ return lexer_pack_token(lx, TOKEN_COLON); break; case ' ': case '\t': /* Discard white-space and add space token. */ lexer_discard_white_space(lx); return lexer_pack_token(lx, TOKEN_SPACE); break; case '$': return lexer_read_substitution(lx); break; case '\'': lexer_add_to_lexeme(lx, '\''); lexer_read_literal_quoted(lx); lexer_add_to_lexeme(lx, '\''); return lexer_pack_token(lx, TOKEN_LITERAL); break; case '-': if(lexer_peek_remote_rename_syntax(lx)) { lexer_next_char(lx); /* Jump -> */ lexer_next_char(lx); return lexer_pack_token(lx, TOKEN_REMOTE_RENAME); } /* Else fall through */ default: return lexer_read_filename(lx); break; } }
struct token *lexer_read_command_argument(struct lexer *lx) { int c = lexer_next_peek(lx); switch (c) { case CHAR_EOF: /* Found end of file while completing command */ lx->lexeme_end++; lx->eof = 1; if(lx->depth == 0) lexer_report_error(lx, "Found end of file while completing command.\n"); return NULL; break; case '\n': lexer_next_char(lx); /* Jump \n */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_NEWLINE); break; case '#': lexer_discard_comments(lx); lexer_add_to_lexeme(lx, '\n'); return lexer_pack_token(lx, TOKEN_NEWLINE); case ' ': case '\t': return lexer_read_white_space(lx); break; case '$': return lexer_read_substitution(lx); break; case '"': return lexer_read_expandable(lx, '"'); break; case '<': case '>': lexer_next_char(lx); /* Jump <, > */ lexer_add_to_lexeme(lx, c); return lexer_pack_token(lx, TOKEN_IO_REDIRECT); break; case '\'': lexer_add_to_lexeme(lx, '\''); lexer_read_literal(lx); lexer_add_to_lexeme(lx, '\''); return lexer_pack_token(lx, TOKEN_LITERAL); break; default: lexer_read_literal(lx); return lexer_pack_token(lx, TOKEN_LITERAL); break; } }
struct token *lexer_read_file(struct lexer_book *bk) { int c = lexer_next_peek(bk); switch(c) { case CHAR_EOF: lexer_report_error(bk, "Found end of file while completing file list.\n"); return NULL; break; case '\n' : lexer_next_char(bk); /* Jump \n */ lexer_add_to_lexeme(bk, c); return lexer_pack_token(bk, NEWLINE); break; case '#' : lexer_discard_comments(bk); lexer_add_to_lexeme(bk, '\n'); return lexer_pack_token(bk, NEWLINE); case ':' : lexer_next_char(bk); /* Jump : */ return lexer_pack_token(bk, COLON); break; case ' ' : case '\t': /* Discard white-space and try again */ lexer_discard_white_space(bk); return lexer_read_file(bk); break; case '$' : return lexer_read_substitution(bk); break; case '\'': lexer_add_to_lexeme(bk, '\''); lexer_read_literal_quoted(bk); lexer_add_to_lexeme(bk, '\''); return lexer_pack_token(bk, LITERAL); break; case '-' : if( lexer_peek_remote_rename_syntax(bk) ) { lexer_next_char(bk); /* Jump -> */ lexer_next_char(bk); return lexer_pack_token(bk, REMOTE_RENAME); } /* Else fall through */ default: return lexer_read_filename(bk); break; } }
static int dag_parse_syntax(struct lexer *bk) { struct token *t = lexer_next_token(bk); if(strcmp(t->lexeme, "export") == 0) { lexer_free_token(t); dag_parse_export(bk); } else { lexer_report_error(bk, "Unknown syntax keyboard.\n"); } return 1; }
/* Read everything between single quotes */ int lexer_read_literal_quoted(struct lexer * lx) { int c = lexer_next_peek(lx); if(c != '\'') lexer_report_error(lx, "Missing opening quote.\n"); lexer_add_to_lexeme(lx, lexer_next_char(lx)); /* Add first ' */ int count = lexer_read_escaped_until(lx, "'"); lexer_add_to_lexeme(lx, lexer_next_char(lx)); /* Add second ' */ return count; }
accept_t lexer_read_syntax(struct lexer_book * bk) { lexer_discard_white_space(bk); struct token *name = lexer_read_syntax_name(bk); if(strcmp("export", name->lexeme) == 0) return lexer_read_syntax_export(bk, name); else if(lexer_unquoted_look_ahead_count(bk, "=") > -1) return lexer_read_variable(bk, name); else { lexer_roll_back(bk, strlen(name->lexeme)); lexer_report_error(bk, "Unrecognized keyword: %s.", name->lexeme); } return NO; }
int lexer_read_syntax_or_variable(struct lexer * lx) { lexer_discard_white_space(lx); struct token *name = lexer_read_syntax_name(lx); if(strcmp("export", name->lexeme) == 0) return lexer_read_syntax_export(lx, name); else if(lexer_unquoted_look_ahead_count(lx, "=") > -1) return lexer_read_variable(lx, name); else { lexer_roll_back(lx, strlen(name->lexeme)); lexer_report_error(lx, "Unrecognized keyword: %s.", name->lexeme); } return 1; }
static int dag_parse(struct dag *d, FILE *stream) { struct lexer *bk = lexer_create(STREAM, stream, 1, 1); bk->d = d; bk->stream = stream; bk->category = dag_task_category_lookup_or_create(d, "default"); struct dag_variable_lookup_set s = { d, NULL, NULL, NULL }; bk->environment = &s; struct token *t; while((t = lexer_peek_next_token(bk))) { s.category = bk->category; s.node = NULL; s.table = NULL; switch (t->type) { case TOKEN_NEWLINE: case TOKEN_SPACE: /* Skip newlines, spaces at top level. */ lexer_free_token(lexer_next_token(bk)); break; case TOKEN_SYNTAX: dag_parse_syntax(bk); break; case TOKEN_FILES: dag_parse_node(bk); break; case TOKEN_VARIABLE: dag_parse_variable(bk, NULL); break; default: lexer_report_error(bk, "Unexpected token. Expected one of NEWLINE, SPACE, SYNTAX, FILES, or VARIABLE, but got: %s\n:", lexer_print_token(t)); break; } } dag_close_over_environment(d); dag_compile_ancestors(d); free(bk); return 1; }
/* Consolidates a sequence of white space into a single SPACE token */ struct token *lexer_read_white_space(struct lexer_book *bk) { int count = 0; while(strchr(WHITE_SPACE, lexer_next_peek(bk))) { count++; lexer_next_char(bk); } if(count > 0) { lexer_add_to_lexeme(bk, ' '); return lexer_pack_token(bk, SPACE); } else lexer_report_error(bk, "Expecting white space."); return NULL; }
/* Consolidates a sequence of white space into a single SPACE token */ struct token *lexer_read_white_space(struct lexer *lx) { int count = lexer_discard_white_space(lx); while(strchr(WHITE_SPACE, lexer_next_peek(lx))) { count++; lexer_next_char(lx); } if(count > 0) { lexer_add_to_lexeme(lx, ' '); return lexer_pack_token(lx, TOKEN_SPACE); } else lexer_report_error(lx, "Expecting white space."); return NULL; }
char lexer_next_char(struct lexer_book *bk) { if( *bk->lexeme_end == CHAR_EOF ) { return CHAR_EOF; } /* If at the end of chunk, load the next chunk. */ if( ((bk->lexeme_end + 1) == (bk->buffer + BUFFER_CHUNK_SIZE - 1)) || ((bk->lexeme_end + 1) == (bk->buffer + 2*BUFFER_CHUNK_SIZE - 1)) ) { if( bk->lexeme_max == BUFFER_CHUNK_SIZE - 1 ) lexer_report_error(bk, "Input buffer is full. Runaway token?"); //BUG: This is really a recoverable error, increase the buffer size. /* Wrap around the file chunks */ else if( bk->lexeme_end == bk->buffer + 2 * BUFFER_CHUNK_SIZE - 2) bk->lexeme_end = bk->buffer; /* Position at the beginning of next chunk */ else bk->lexeme_end+=2; lexer_load_chunk(bk); } else bk->lexeme_end++; char c = *bk->lexeme_end; if( c == '\n' ) { bk->line_number++; list_push_head(bk->column_numbers, (uint64_t *) bk->column_number); bk->column_number = 1; } else { bk->column_number++; } if( c == CHAR_EOF ) { bk->eof = 1; } return c; }
int lexer_read_file_list(struct lexer *lx) { /* Add file list start marker */ lexer_push_token(lx, lexer_pack_token(lx, TOKEN_FILES)); struct list *tokens = lexer_read_file_list_aux(lx); lexer_concatenate_consecutive_literals(tokens); int count = lexer_append_tokens(lx, tokens); if(count < 1) lexer_report_error(lx, "Rule files specification is empty.\n"); list_delete(tokens); return count; }