// Ditch tokens until a legal one is found // The token set must be TK_NONE terminated static void ditch_restart(parser_t* parser, rule_state_t* state) { assert(parser != NULL); assert(state != NULL); assert(state->restart != NULL); if(trace_enable) printf("Rule %s: Attempting recovery:\n", state->fn_name); while(true) { token_id id = current_token_id(parser); for(const token_id* p = state->restart; *p != TK_NONE; p++) { if(*p == id) { // Legal token found if(trace_enable) printf(" recovered with %s\n", token_print(parser->token)); return; } } // Current token is not in legal set, ditch it if(trace_enable) printf(" ignoring %d %s %s\n", id, lexer_print(id), token_print(parser->token)); consume_token_no_ast(parser); } }
// Don't test printing code to thoroughly because it will change a lot void test_print_functions() { char* output_ptr = NULL; size_t output_len = 0; FILE* output = NULL; node_p module = node_alloc(NT_MODULE); module->module.filename = str_from_c("tokenizer_test.c/test_print_functions"); module->module.source = str_from_c("x = \n1 + y\n\"next\nline\""); tokenize(module->module.source, &module->tokens, stderr); st_check_int(module->tokens.len, 12); output = open_memstream(&output_ptr, &output_len); token_print(output, &module->tokens.ptr[10], TP_SOURCE); fclose(output); st_check_str(output_ptr, "\"next\nline\""); output = open_memstream(&output_ptr, &output_len); token_print(output, &module->tokens.ptr[10], TP_DUMP); fclose(output); st_check_not_null( strstr(output_ptr, "\"next\nline\"") ); output = open_memstream(&output_ptr, &output_len); token_print(output, &module->tokens.ptr[10], TP_INLINE_DUMP); fclose(output); st_check_not_null( strstr(output_ptr, "\"next\\nline\"") ); }
/* Tidy up a successfully parsed rule. * Args: * rule_set is a NULL terminated list. * out_found reports whether an optional token was found. Only set on * success. May be set to NULL if this information is not needed. * * Returns: * AST created, NULL for none. */ ast_t* parse_rule_complete(parser_t* parser, rule_state_t* state) { assert(parser != NULL); assert(state != NULL); process_deferred_ast(parser, state); if(state->scope && state->ast != NULL) ast_scope(state->ast); if(trace_enable) printf("Rule %s: Complete\n", state->fn_name); if(state->restart == NULL) return state->ast; // We have a restart point, check next token is legal token_id id = current_token_id(parser); if(trace_enable) printf("Rule %s: Check restart set for next token %s\n", state->fn_name, token_print(parser->token)); for(const token_id* p = state->restart; *p != TK_NONE; p++) { if(*p == id) { // Legal token found if(trace_enable) printf("Rule %s: Restart check successful\n", state->fn_name); return state->ast; } } // Next token is not in restart set, error if(trace_enable) printf("Rule %s: Restart check error\n", state->fn_name); assert(parser->token != NULL); error(parser->source, token_line_number(parser->token), token_line_position(parser->token), "syntax error: unexpected token %s after %s", token_print(parser->token), state->desc); ast_free(state->ast); parser->failed = true; ditch_restart(parser, state); return NULL; }
void scan_file(char *filename) { FILE *fp = fopen(filename, "r"); if (!fp) { printf("Can't open input file %s!\n", filename); exit(1); } int current_line = 0; int more_lines = true; while (more_lines) { int line_len = get_current_line_length(fp); { char *buf = malloc(line_len); char *starting_buf = buf; more_lines = read_line(fp, buf); current_line++; while (1) { Token *token = scan_string(&buf); if (!token) { break; } if (token->type == INVALID) { printf("Error: Unrecognized token '%s' in %s (row %i, col %i)\n", token->text, filename, current_line, (int)(buf - starting_buf)); exit(1); } token_print(*token); token_free(token); } free(buf - line_len); } } }
int main (int argc, char *argv[]) { struct lexer *lex = (struct lexer *) malloc (sizeof (struct lexer)); struct token *tok = NULL; if (argc <= 1) { fprintf (stderr, "No input file\n"); goto cleanup; } if (!lexer_init (lex, argv[1])) goto cleanup; while ((tok = lexer_get_token (lex))->tok_class != tok_eof) { token_print (tok); token_free (tok); } token_free (tok); lexer_finalize (lex); cleanup: if (lex) free (lex); return 0; }
static size_t length(ast_t* ast, size_t indent, bool type) { size_t len = (indent * in_len) + strlen(token_print(ast->t)); ast_t* child = ast->child; if(type || (child != NULL) || (ast->type != NULL)) len += 2; switch(token_get_id(ast->t)) { case TK_STRING: len += 6; break; case TK_ID: len += 5; break; default: {} } if(ast->symtab != NULL) len += 6; while(child != NULL) { len += 1 + length(child, 0, false); child = child->sibling; } if(ast->type != NULL) len += 1 + length(ast->type, 0, true); return len; }
void list_print(struct List *list){ int i; for(i=0;i<list->length;i++){ token_print(list->tokens[i]); } printf("\n"); }
static void print_token(FILE* fp, token_t* token) { switch(token_get_id(token)) { case TK_STRING: fprintf(fp, "\"\"\"%s\"\"\"", token_print(token)); break; case TK_ID: fprintf(fp, "(id %s)", token_print(token)); break; default: fprintf(fp, "%s", token_print(token)); break; } }
int main(void) { int token; while ((token = yylex())) { token_print(token); } return 0; }
static void print_token(FILE* fp, token_t* token) { switch(token_get_id(token)) { case TK_STRING: { char* escaped = token_print_escaped(token); fprintf(fp, "\"%s\"", escaped); ponyint_pool_free_size(strlen(escaped), escaped); break; } case TK_ID: fprintf(fp, "(id %s)", token_print(token)); break; default: fprintf(fp, "%s", token_print(token)); break; } }
/* * This is the top level routine of the printer. 'p' points * to the TR header of the packet, 'h->ts' is the timestamp, * 'h->len' is the length of the packet off the wire, and 'h->caplen' * is the number of bytes actually captured. */ u_int token_if_print(const struct pcap_pkthdr *h, packetbody_t p) { return (token_print(p, h->len, h->caplen)); }
int snap_print(const u_char *p, u_int length, u_int caplen, u_short *extracted_ethertype, u_int bridge_pad) { u_int32_t orgcode; register u_short et; register int ret; TCHECK2(*p, 5); orgcode = EXTRACT_24BITS(p); et = EXTRACT_16BITS(p + 3); if (eflag) { const struct tok *tok = null_values; const struct oui_tok *otp; for (otp = &oui_to_tok[0]; otp->tok != NULL; otp++) { if (otp->oui == orgcode) { tok = otp->tok; break; } } (void)printf("oui %s (0x%06x), %s %s (0x%04x): ", tok2str(oui_values, "Unknown", orgcode), orgcode, (orgcode == 0x000000 ? "ethertype" : "pid"), tok2str(tok, "Unknown", et), et); } p += 5; length -= 5; caplen -= 5; switch (orgcode) { case OUI_ENCAP_ETHER: case OUI_CISCO_90: /* * This is an encapsulated Ethernet packet, * or a packet bridged by some piece of * Cisco hardware; the protocol ID is * an Ethernet protocol type. */ ret = ether_encap_print(et, p, length, caplen, extracted_ethertype); if (ret) return (ret); break; case OUI_APPLETALK: if (et == ETHERTYPE_ATALK) { /* * No, I have no idea why Apple used one * of their own OUIs, rather than * 0x000000, and an Ethernet packet * type, for Appletalk data packets, * but used 0x000000 and an Ethernet * packet type for AARP packets. */ ret = ether_encap_print(et, p, length, caplen, extracted_ethertype); if (ret) return (ret); } break; case OUI_CISCO: if (et == PID_CISCO_CDP) { cdp_print(p, length, caplen); return (1); } break; case OUI_RFC2684: switch (et) { case PID_RFC2684_ETH_FCS: case PID_RFC2684_ETH_NOFCS: /* * XXX - remove the last two bytes for * PID_RFC2684_ETH_FCS? */ /* * Skip the padding. */ TCHECK2(*p, bridge_pad); caplen -= bridge_pad; length -= bridge_pad; p += bridge_pad; /* * What remains is an Ethernet packet. */ ether_print(p, length, caplen); return (1); case PID_RFC2684_802_5_FCS: case PID_RFC2684_802_5_NOFCS: /* * XXX - remove the last two bytes for * PID_RFC2684_ETH_FCS? */ /* * Skip the padding, but not the Access * Control field. */ TCHECK2(*p, bridge_pad); caplen -= bridge_pad; length -= bridge_pad; p += bridge_pad; /* * What remains is an 802.5 Token Ring * packet. */ token_print(p, length, caplen); return (1); case PID_RFC2684_FDDI_FCS: case PID_RFC2684_FDDI_NOFCS: /* * XXX - remove the last two bytes for * PID_RFC2684_ETH_FCS? */ /* * Skip the padding. */ TCHECK2(*p, bridge_pad + 1); caplen -= bridge_pad + 1; length -= bridge_pad + 1; p += bridge_pad + 1; /* * What remains is an FDDI packet. */ fddi_print(p, length, caplen); return (1); case PID_RFC2684_BPDU: stp_print(p, length); return (1); } } return (0); trunc: (void)printf("[|snap]"); return (1); }
/* * This is the top level routine of the printer. 'p' points * to the TR header of the packet, 'h->ts' is the timestamp, * 'h->len' is the length of the packet off the wire, and 'h->caplen' * is the number of bytes actually captured. */ u_int token_if_print(const struct pcap_pkthdr *h, const u_char *p) { return (token_print(p, h->len, h->caplen)); }
int snap_print(netdissect_options *ndo, const u_char *p, u_int length, u_int caplen, const struct lladdr_info *src, const struct lladdr_info *dst, u_int bridge_pad) { uint32_t orgcode; u_short et; int ret; ND_TCHECK_5(p); if (caplen < 5 || length < 5) goto trunc; orgcode = EXTRACT_BE_U_3(p); et = EXTRACT_BE_U_2(p + 3); if (ndo->ndo_eflag) { /* * Somebody's already printed the MAC addresses, if there * are any, so just print the SNAP header, not the MAC * addresses. */ ND_PRINT("oui %s (0x%06x), %s %s (0x%04x), length %u: ", tok2str(oui_values, "Unknown", orgcode), orgcode, (orgcode == 0x000000 ? "ethertype" : "pid"), tok2str(oui_to_struct_tok(orgcode), "Unknown", et), et, length - 5); } p += 5; length -= 5; caplen -= 5; switch (orgcode) { case OUI_ENCAP_ETHER: case OUI_CISCO_90: /* * This is an encapsulated Ethernet packet, * or a packet bridged by some piece of * Cisco hardware; the protocol ID is * an Ethernet protocol type. */ ret = ethertype_print(ndo, et, p, length, caplen, src, dst); if (ret) return (ret); break; case OUI_APPLETALK: if (et == ETHERTYPE_ATALK) { /* * No, I have no idea why Apple used one * of their own OUIs, rather than * 0x000000, and an Ethernet packet * type, for Appletalk data packets, * but used 0x000000 and an Ethernet * packet type for AARP packets. */ ret = ethertype_print(ndo, et, p, length, caplen, src, dst); if (ret) return (ret); } break; case OUI_CISCO: switch (et) { case PID_CISCO_CDP: cdp_print(ndo, p, length, caplen); return (1); case PID_CISCO_DTP: dtp_print(ndo, p, length); return (1); case PID_CISCO_UDLD: udld_print(ndo, p, length); return (1); case PID_CISCO_VTP: vtp_print(ndo, p, length); return (1); case PID_CISCO_PVST: case PID_CISCO_VLANBRIDGE: stp_print(ndo, p, length); return (1); default: break; } break; case OUI_RFC2684: switch (et) { case PID_RFC2684_ETH_FCS: case PID_RFC2684_ETH_NOFCS: /* * XXX - remove the last two bytes for * PID_RFC2684_ETH_FCS? */ /* * Skip the padding. */ ND_TCHECK_LEN(p, bridge_pad); caplen -= bridge_pad; length -= bridge_pad; p += bridge_pad; /* * What remains is an Ethernet packet. */ ether_print(ndo, p, length, caplen, NULL, NULL); return (1); case PID_RFC2684_802_5_FCS: case PID_RFC2684_802_5_NOFCS: /* * XXX - remove the last two bytes for * PID_RFC2684_ETH_FCS? */ /* * Skip the padding, but not the Access * Control field. */ ND_TCHECK_LEN(p, bridge_pad); caplen -= bridge_pad; length -= bridge_pad; p += bridge_pad; /* * What remains is an 802.5 Token Ring * packet. */ token_print(ndo, p, length, caplen); return (1); case PID_RFC2684_FDDI_FCS: case PID_RFC2684_FDDI_NOFCS: /* * XXX - remove the last two bytes for * PID_RFC2684_ETH_FCS? */ /* * Skip the padding. */ ND_TCHECK_LEN(p, bridge_pad + 1); caplen -= bridge_pad + 1; length -= bridge_pad + 1; p += bridge_pad + 1; /* * What remains is an FDDI packet. */ fddi_print(ndo, p, length, caplen); return (1); case PID_RFC2684_BPDU: stp_print(ndo, p, length); return (1); } } if (!ndo->ndo_eflag) { /* * Nobody printed the link-layer addresses, so print them, if * we have any. */ if (src != NULL && dst != NULL) { ND_PRINT("%s > %s ", (src->addr_string)(ndo, src->addr), (dst->addr_string)(ndo, dst->addr)); } /* * Print the SNAP header, but if the OUI is 000000, don't * bother printing it, and report the PID as being an * ethertype. */ if (orgcode == 0x000000) { ND_PRINT("SNAP, ethertype %s (0x%04x), length %u: ", tok2str(ethertype_values, "Unknown", et), et, length); } else { ND_PRINT("SNAP, oui %s (0x%06x), pid %s (0x%04x), length %u: ", tok2str(oui_values, "Unknown", orgcode), orgcode, tok2str(oui_to_struct_tok(orgcode), "Unknown", et), et, length); } } return (0); trunc: ND_PRINT("[|snap]"); return (1); }
const char* ast_get_print(ast_t* ast) { assert(ast != NULL); return token_print(ast->t); }
static void print_type(printbuf_t* buffer, ast_t* type) { switch(ast_id(type)) { case TK_NOMINAL: { AST_GET_CHILDREN(type, package, id, typeargs, cap, ephemeral); ast_t* origpkg = ast_sibling(ephemeral); if(origpkg != NULL && ast_id(origpkg) != TK_NONE) printbuf(buffer, "%s.", ast_name(origpkg)); ast_t* def = (ast_t*)ast_data(type); if(def != NULL) id = ast_child(def); printbuf(buffer, "%s", ast_nice_name(id)); if(ast_id(typeargs) != TK_NONE) print_typeexpr(buffer, typeargs, ", ", true); if(ast_id(cap) != TK_NONE) printbuf(buffer, " %s", token_print(cap->t)); if(ast_id(ephemeral) != TK_NONE) printbuf(buffer, "%s", token_print(ephemeral->t)); break; } case TK_UNIONTYPE: print_typeexpr(buffer, type, " | ", false); break; case TK_ISECTTYPE: print_typeexpr(buffer, type, " & ", false); break; case TK_TUPLETYPE: print_typeexpr(buffer, type, ", ", false); break; case TK_TYPEPARAMREF: { AST_GET_CHILDREN(type, id, cap, ephemeral); printbuf(buffer, "%s", ast_nice_name(id)); if(ast_id(cap) != TK_NONE) printbuf(buffer, " %s", token_print(cap->t)); if(ast_id(ephemeral) != TK_NONE) printbuf(buffer, " %s", token_print(ephemeral->t)); break; } case TK_ARROW: { AST_GET_CHILDREN(type, left, right); print_type(buffer, left); printbuf(buffer, "->"); print_type(buffer, right); break; } case TK_THISTYPE: printbuf(buffer, "this"); break; case TK_DONTCARE: printbuf(buffer, "_"); break; case TK_FUNTYPE: printbuf(buffer, "function"); break; case TK_INFERTYPE: printbuf(buffer, "to_infer"); break; case TK_ERRORTYPE: printbuf(buffer, "<type error>"); break; case TK_NONE: break; default: printbuf(buffer, "%s", token_print(type->t)); } }
/* * This is the top level routine of the printer. 'p' points * to the TR header of the packet, 'h->ts' is the timestamp, * 'h->len' is the length of the packet off the wire, and 'h->caplen' * is the number of bytes actually captured. */ u_int token_if_print(netdissect_options *ndo, const struct pcap_pkthdr *h, const u_char *p) { return (token_print(ndo, p, h->len, h->caplen)); }
/* * Command-line front end for compiler. */ int main(int argc, char **argv) { int action; int arg; buffer_t *in_buffer; buffer_t *out_buffer; int is_done; parser_t *parser; lexer_t *lexer; token_t *token; /* Set default settings. */ action = ACT_TRANS; in_buffer = buffer_create(stdin); out_buffer = buffer_create(stdout); /* Parse command-line arguments. */ for (arg = 1; arg < argc; arg++) { if ((strcmp(argv[arg], "--help") == 0) || (strcmp(argv[arg], "-h") == 0)) { action = ACT_USAGE; } else if ((strcmp(argv[arg], "--lex") == 0) && (action <= ACT_LEX)) { action = ACT_LEX; } else if ((strcmp(argv[arg], "--parse") == 0) && (action <= ACT_PARSE)) { action = ACT_PARSE; } else if ((strcmp(argv[arg], "--translate") == 0) && (action <= ACT_TRANS)) { action = ACT_TRANS; } else { fprintf(stderr, "Invalid argument: %s\n", argv[arg]); /* Stop parsing command-line. */ arg = argc; action = ACT_USAGE; } } /* Take action. */ if (action == ACT_USAGE) { printf( "Usage: compiler [option...]\n" "\n" " Options:\n" "\n" " -h, --help Display this help text.\n" " --lex Run the lexer.\n" " --parse Run the parser. (Calls the lexer.)\n" " --translate Run the translator. (Calls the parser.)\n" ); } else if (action == ACT_LEX) { is_done = 0; lexer = lexer_create(in_buffer); token = token_create(); while (!is_done) { lexer_lex(lexer, token); token_print(token, stdout); printf("\n"); if (token_get_class(token) == T_EOF) is_done = 1; } token_destroy(token); lexer_destroy(lexer); return EXIT_SUCCESS; } else if (action == ACT_PARSE) { parser = parser_create(in_buffer); parser_parse(parser); parser_destroy(parser); return EXIT_SUCCESS; } else if (action == ACT_TRANS) { parser = parser_create(in_buffer); parser_parse(parser); translator_translate(parser_get_tree(parser)); parser_destroy(parser); return EXIT_SUCCESS; } return EXIT_SUCCESS; }
/* Check if current token matches any in given set and consume on match. * Args: * terminating is the description of the structure this token terminates, * NULL for none. Used only for error messages. * id_set is a TK_NONE terminated list. * make_ast specifies whether to construct an AST node on match or discard * consumed token. * out_found reports whether an optional token was found. Only set on * success. May be set to NULL if this information is not needed. * * Returns: * PARSE_OK on success. * PARSE_ERROR to propogate a lexer error. * RULE_NOT_FOUND if current token is not is specified set. * NULL to propogate a restarted error. */ ast_t* parse_token_set(parser_t* parser, rule_state_t* state, const char* desc, const char* terminating, const token_id* id_set, bool make_ast, bool* out_found) { assert(parser != NULL); assert(state != NULL); assert(id_set != NULL); token_id id = current_token_id(parser); if(id == TK_LEX_ERROR) return propogate_error(parser, state); if(desc == NULL) desc = token_id_desc(id_set[0]); if(trace_enable) { printf("Rule %s: Looking for %s token%s %s. Found %s. ", state->fn_name, (state->deflt_id == TK_LEX_ERROR) ? "required" : "optional", (id_set[1] == TK_NONE) ? "" : "s", desc, token_print(parser->token)); } for(const token_id* p = id_set; *p != TK_NONE; p++) { // Match new line if the next token is the first on a line if(*p == TK_NEWLINE) { assert(parser->token != NULL); size_t last_token_line = parser->last_token_line; size_t next_token_line = token_line_number(parser->token); bool is_newline = (next_token_line != last_token_line); if(out_found != NULL) *out_found = is_newline; if(trace_enable) printf("\\n %smatched\n", is_newline ? "" : "not "); state->deflt_id = TK_LEX_ERROR; return PARSE_OK; } if(id == *p) { // Current token matches one in set if(trace_enable) printf("Compatible\n"); parser->last_matched = token_print(parser->token); if(make_ast) return handle_found(parser, state, consume_token(parser), default_builder, out_found); // AST not needed, discard token consume_token_no_ast(parser); return handle_found(parser, state, NULL, NULL, out_found); } } // Current token does not match any in current set if(trace_enable) printf("Not compatible\n"); return handle_not_found(parser, state, desc, terminating, out_found); }