void init_symbtable(void) { symbtable = (TOKEN**)calloc(HASH_SIZE, sizeof(TOKEN*)); int_token = new_token(INT); int_token->lexeme = "int"; function_token = new_token(FUNCTION); function_token->lexeme = "function"; void_token = new_token(VOID); void_token->lexeme = "void"; }
void __parser(lp_token** input, int count) { success=false; int top_state=0; int input_index=0; int accept_state=16; int pop_count=0; int action=0; //initialize the stack at state 0 pcstack* parse_stack=new_pcstack(); push_stack(parse_stack,(void*)new_token(0,0)); while(true) { top_state=((lp_token*)peek_stack(parse_stack))->lex_id; if(input_index==count)action=parse_table[top_state][12]; else if(input[input_index]->lex_id>=12)return; else action=parse_table[top_state][input[input_index]->lex_id]; if(action==accept_state)//accept { action=-((lp_token*)peek_stack(parse_stack))->lex_id; __prh(&action,parse_stack); //printf("accept\n"); success=true; return; } if(action>0)//shift { //printf("shift\n"); push_stack(parse_stack,(void*)new_token(action,input[input_index]->lex_val)); ++input_index; } else if(action<0)//reduce { pop_count=__prh(&action,parse_stack); if(pop_count==-1)break;//catch errors here while(pop_count>0) { pop_stack(parse_stack); --pop_count; } push_stack(parse_stack,(void*)new_token(parse_table[((lp_token*)peek_stack(parse_stack))->lex_id][action],0)); //printf("reduce\n"); } else//error { //printf("error\n"); return; } } }
ORCTOKEN *lookup_token(CSOUND *csound, char *s, void *yyscanner) { int type = T_IDENT; ORCTOKEN *a; ORCTOKEN *ans; if (PARSER_DEBUG) csound->Message(csound, "Looking up token for: %s\n", s); if (udoflag == 0) { if (isUDOAnsList(s)) { ans = new_token(csound, UDO_ANS_TOKEN); ans->lexeme = (char*)csound->Malloc(csound, 1+strlen(s)); strcpy(ans->lexeme, s); return ans; } } if (udoflag == 1) { if (csound->oparms->odebug) printf("Found UDO Arg List\n"); if (isUDOArgList(s)) { ans = new_token(csound, UDO_ARGS_TOKEN); ans->lexeme = (char*)csound->Malloc(csound, 1+strlen(s)); strcpy(ans->lexeme, s); return ans; } } a = cs_hash_table_get(csound, csound->symbtab, s); if (a != NULL) { ans = (ORCTOKEN*)csound->Malloc(csound, sizeof(ORCTOKEN)); memcpy(ans, a, sizeof(ORCTOKEN)); ans->next = NULL; ans->lexeme = (char *)csound->Malloc(csound, strlen(a->lexeme) + 1); strcpy(ans->lexeme, a->lexeme); return ans; } ans = new_token(csound, T_IDENT); ans->lexeme = (char*)csound->Malloc(csound, 1+strlen(s)); strcpy(ans->lexeme, s); if (udoflag == -2 || namedInstrFlag == 1) { return ans; } ans->type = type; return ans; }
ORCTOKEN *add_token(CSOUND *csound, char *s, int type) { //printf("Hash value for %s: %i\n", s, h); ORCTOKEN *a = cs_hash_table_get(csound, csound->symbtab, s); ORCTOKEN *ans; if (a!=NULL) { if (type == a->type) return a; if ((type!=T_FUNCTION || a->type!=T_OPCODE)) csound->Warning(csound, Str("Type confusion for %s (%d,%d), replacing\n"), s, type, a->type); a->type = type; return a; } ans = new_token(csound, T_IDENT); ans->lexeme = (char*)csound->Malloc(csound, 1+strlen(s)); strcpy(ans->lexeme, s); ans->type = type; cs_hash_table_put(csound, csound->symbtab, s, ans); return ans; }
struct nsa_token * create_unit(struct nsa_parser *p,const char *s,struct nsa_token *t) { const char *n = nsa_trim_morph(p->context, s); struct nsa_hash_data *d = hash_find(p->context->step_index, (unsigned char *)n); if (d) { struct nsa_token *tu = new_token(); struct nsa_unit *u = new_unit(); List *l = list_create(LIST_SINGLE); list_add(l,t); if (d->continuations) d = check_continuations(d,p,&n,l); u->name = (char *)npool_copy((unsigned char *)n,p->pool); u->cands = d->cands; tu->type = NSA_T_UNIT; if (t) { struct nsa_token *lt; int i; tu->children = new_children(list_len(l)); for (i = 0, lt = list_first(l); lt; lt = list_next(l),++i) tu->children[i] = lt; } tu->d.u = u; list_free(l,NULL); return tu; } else return t; }
int main (int argc, char **argv) { program_name = basename (argv[0]); scan_options (argc, argv); stack *stack = new_stack (); token *scanner = new_token (stdin); for (;;) { int token = scan_token (scanner); if (token == EOF) break; switch (token) { case NUMBER: do_push (stack, peek_token (scanner)); break; case '+': do_binop (stack, add_bigint); break; case '-': do_binop (stack, sub_bigint); break; case '*': do_binop (stack, mul_bigint); break; case 'c': do_clear (stack); break; case 'f': do_print_all (stack); break; case 'p': do_print (stack); break; default: unimplemented (token); break; } } do_clear(stack); free_stack(stack); free_token(scanner); DEBUGF ('m', "EXIT %d\n", exit_status); return EXIT_SUCCESS; }
output * new_token_with_child_and_next( enum tokens tok, lex_list *l, output *child, output *next, lex_list *n ) { output *o = new_token(tok, l, n); o->tok->lhs = child->tok; o->tok->next = next->tok; return o; }
output * new_token_with_lrhs( enum tokens tok, lex_list *l, output *lhs, output *rhs, lex_list *n ) { output *o = new_token(tok, l, n); o->tok->lhs = lhs->tok; o->tok->rhs = rhs->tok; return o; }
/** * Parse string literal (ECMA-262 v5, 7.8.4) */ static token parse_string (void) { ecma_char_t c = (ecma_char_t) LA (0); JERRY_ASSERT (c == '\'' || c == '"'); consume_char (); new_token (); const bool is_double_quoted = (c == '"'); const char end_char = (is_double_quoted ? '"' : '\''); do { c = (ecma_char_t) LA (0); consume_char (); if (c == '\0') { PARSE_ERROR ("Unclosed string", token_start - buffer_start); } else if (ecma_char_is_line_terminator (c)) { PARSE_ERROR ("String literal shall not contain newline character", token_start - buffer_start); } else if (c == '\\') { ecma_char_t nc = (ecma_char_t) LA (0); if (convert_single_escape_character (nc, NULL)) { consume_char (); } else if (ecma_char_is_line_terminator (nc)) { consume_char (); if (ecma_char_is_carriage_return (nc)) { nc = (ecma_char_t) LA (0); if (ecma_char_is_new_line (nc)) { consume_char (); } } } } } while (c != end_char); token ret = convert_string_to_token_transform_escape_seq (TOK_STRING, token_start, (size_t) (buffer - token_start) - 1u); token_start = NULL; return ret; } /* parse_string */
TOKEN * keyword(int primary) { struct token_t * tkn; tkn = new_token(primary); return tkn; }
/* postcondition: returned value is final answer, and pqueue_postfix should be empty */ double evaluate_postfix(struct token_queue * pqueue_postfix) { double ans=0; p_expr_token stack_values=NULL, ptoken, pvalue; double operands[2]; union token_value value; int i; while( ptoken=dequeue(pqueue_postfix)){ switch(ptoken->type){ case OPERAND: // operands always pushed to stack push(&stack_values, ptoken); break; case OPERATOR: for(i=0; i<op_operands[ptoken->value.op_code]; i++){ if(pvalue=pop(&stack_values)){ operands[i]=pvalue->value.operand; free(pvalue); } else goto error; } switch(ptoken->value.op_code){ case ADD: value.operand=operands[1]+operands[0]; break; case SUBTRACT: value.operand=operands[1]-operands[0]; break; case MULTIPLY: value.operand=operands[1]*operands[0]; break; case DIVIDE: value.operand=operands[1]/operands[0]; break; case NEGATE: value.operand=-operands[0]; } push(&stack_values, new_token(OPERAND, value)); default: free(ptoken); break; } } if(stack_values) ans=stack_values->value.operand; cleanup: // free any remaining tokens while(ptoken=dequeue(pqueue_postfix)) free(ptoken); while(pvalue=pop(&stack_values)) free(pvalue); return ans; error: fputs("Error evaluating the expression.\n", stderr); goto cleanup; }
static char *iftag_if (int argc, char *argv[]) { char *new_argv[MAX_ARGS]; int c; /* process all arguments */ for (c = 0; c < argc; c++) new_argv[c] = process_text (argv[c]); /* evaluate the expression */ if (eval_if (argc, new_argv)) /* for a true expression: we are now inside the if-block */ new_token (TOK_IF_INSIDE); else /* the evaluation give false: don't enter in this if-block */ new_token (TOK_IF_NOTYET); /* update parser state */ update_state (); /* free all arguments */ for (c = 0; c < argc; c++) free (new_argv[c]); /* nothing to add */ return NULL; }
void nsa_ucount(struct nsa_parser *p) { List *newtoks = list_create(LIST_DOUBLE); struct nsa_token *t; for (t = list_first(p->toks); t; t = list_next(p->toks)) { if (t->type == NSA_T_GRAPHEME) { if (grapheme_num(t)) { struct nsa_token *c = new_token(); struct nsa_token *la2 = la2_trap(newtoks,p); int nkids = 1; if (la2) ++nkids; c->d.c = new_count(); c->type = NSA_T_COUNT; c->children = new_children(nkids); if (la2) { c->children[0] = la2; c->children[1] = t; } else c->children[0] = t; *c->d.c = *nsa_parse_count(grapheme_num(t),la2 ? -1 : 1); list_add(newtoks,c); if (grapheme_unit(t)) { struct nsa_token *tu = create_unit(p, grapheme_unit(t), NULL); if (tu) list_add(newtoks,tu); else fprintf(stderr,"unknown unit in count-unit grapheme `%s'\n", nsa_grapheme_text(t)); } } else if (!newtoks->last || ((struct nsa_token*)list_last(newtoks))->type != NSA_T_UNIT) list_add(newtoks,create_unit(p, (const char *)nsa_grapheme_text(t), t)); else list_add(newtoks,t); } else list_add(newtoks,t); } list_free(p->toks,NULL); p->toks = newtoks; }
TOKEN * number(char * text) { struct token_t * tkn; tkn = new_token(TKN_NUMBER); tkn->value.type = V_NUM; tkn->value.number = atof(text); #if DEBUG fprintf(stderr, "NUMBER(%g)\n", tkn->value.number); #endif return tkn; }
TOKEN* lookup_token(char *s) { int h = hash(s); TOKEN *a = symbtable[h]; TOKEN *ans; /* printf("\nLookup: %s\n", s); */ while (a!=NULL) { if (strcmp(a->lexeme, s)==0) return a; a = a->next; } ans = new_token(IDENTIFIER); ans->lexeme = (char*)malloc(1+strlen(s)); strcpy(ans->lexeme, s); ans->next = symbtable[h]; symbtable[h] = ans; /* printf(" stored at %p\n", ans); */ return ans; }
void nsa_token(struct nsa_parser *p, enum nsa_ptypes type, void *ref, const char *s) { struct nsa_token *t = new_token(); if (type == NSA_P_STOP) { t->type = NSA_T_STOP; } else { unsigned char *s2 = npool_copy((const unsigned char *)s,p->pool), *brack; t->type = NSA_T_GRAPHEME; grapheme(t) = new_grapheme(); grapheme_overt(t) = 1; grapheme_text_ref(t) = new_text_ref(); grapheme_text_ref(t)->ptype = type; switch (type) { case NSA_P_LEMM: grapheme_text_ref(t)->t.lemmptr = ref; break; case NSA_P_LITERAL: grapheme_text_ref(t)->t.literal = ref; break; case NSA_P_LINK: grapheme_text_ref(t)->t.linkptr = ref; break; default: break; } if ((brack = (unsigned char *)strchr((const char *)s2,'(')) && (isdigit(*s2) || ((brack-s2)==1 && (*s2 == 'n' || *s2 == 'N')))) { grapheme_num(t) = (char *)s2; *brack++ = '\0'; grapheme_unit(t) = (char *)brack; while (*brack && ')' != *brack) ++brack; *brack = '\0'; } } list_add(p->toks,t); }
TOKEN * hexnumber(char * text) { struct token_t * tkn; char * e; tkn = new_token(TKN_NUMBER); tkn->value.type = V_NUM; tkn->value.number = strtoul(text, &e, 16); if ((e == text) || (*e != 0)) { fprintf(stderr, "error at %s:%d: can't scan hex number \"%s\"\n", infile, lineno, text); exit(1); } #if DEBUG fprintf(stderr, "HEXNUMBER(%g)\n", tkn->value.number); #endif return tkn; }
TOKEN * string(char * text) { struct token_t * tkn; int len; tkn = new_token(TKN_STRING); len = strlen(text); tkn->value.type = V_STR; tkn->value.string = (char *) malloc(len+1); if (tkn->value.string == NULL) { fprintf(stderr, "id(): out of memory\n"); exit(1); } strcpy(tkn->value.string, text); #if DEBUG fprintf(stderr, "STRING(%s)\n", tkn->value.string); #endif return tkn; }
struct token * get_next_token( char * string, int start ) { struct token * next_token = new_token(); if ( string[start] == '\0' ) { next_token->type = "end"; next_token->first = start; next_token->last = start; return next_token; } struct trie_node * current_node = TRIE; struct charhash * next_node = NULL; int i = start; char input_char; while ( input_char = string[i] ) { // Loop characters of string. if ( next_node = charhashlookup( current_node->child, input_char ) ) { current_node = next_node->data; i = i + 1; } else { // No transition for this character. if ( current_node->accepting_type ) { i = i - 1; // "Push" this character back on the input. } break; } } // End characters of string loop. if ( input_char == '\0' ) { i = i - 1; } next_token->first = start; next_token->last = i; next_token->type = current_node->accepting_type; if ( ! current_node->accepting_type ) { next_token->type = "text"; } return next_token; }
struct Token * scan(struct Scanner * scanner) { char * forword = scanner->forword; char * lexeme = scanner->lexeme; enum TYPE type; if(level_p(* forword)) { forword += 1; if(!number_p(* forword)) { raise_error(); } do { forword += 1; } while(number_p(* forword)); type = LEVEL; } else if(first_name_p(forword)) { } else if(last_name_p(forword)) { } else if(cid_p(forword)) { } else if(battle_p(forword)) { } else if(end_of_line_p(forword)) { } scanner->forword = forword; scanner->lexeme = forword; return new_token(type, lexeme, forword - lexeme); }
/** * Parse numeric literal (ECMA-262, v5, 7.8.3) * * @return token of TOK_SMALL_INT or TOK_NUMBER types */ static token lexer_parse_number (void) { ecma_char_t c = LA (0); bool is_hex = false; bool is_fp = false; ecma_number_t fp_res = .0; size_t tok_length = 0, i; token known_token; JERRY_ASSERT (lit_char_is_decimal_digit (c) || c == LIT_CHAR_DOT); if (c == LIT_CHAR_0) { if (LA (1) == LIT_CHAR_LOWERCASE_X || LA (1) == LIT_CHAR_UPPERCASE_X) { is_hex = true; } } else if (c == LIT_CHAR_DOT) { JERRY_ASSERT (lit_char_is_decimal_digit (LA (1))); is_fp = true; } if (is_hex) { // Eat up '0x' consume_char (); consume_char (); new_token (); c = LA (0); if (!lit_char_is_hex_digit (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Invalid HexIntegerLiteral", lit_utf8_iterator_get_pos (&src_iter)); } do { consume_char (); c = LA (0); } while (lit_char_is_hex_digit (c)); if (lexer_is_char_can_be_identifier_start (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Identifier just after integer literal", lit_utf8_iterator_get_pos (&src_iter)); } tok_length = (size_t) (TOK_SIZE ()); const lit_utf8_byte_t *fp_buf_p = TOK_START (); /* token is constructed at end of function */ for (i = 0; i < tok_length; i++) { fp_res = fp_res * 16 + (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i]); } } else { bool is_exp = false; new_token (); // Eat up '.' if (is_fp) { consume_char (); } while (true) { c = LA (0); if (c == LIT_CHAR_DOT) { if (is_fp) { /* token is constructed at end of function */ break; } else { is_fp = true; consume_char (); continue; } } else if (c == LIT_CHAR_LOWERCASE_E || c == LIT_CHAR_UPPERCASE_E) { if (is_exp) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Numeric literal shall not contain more than exponential marker ('e' or 'E')", lit_utf8_iterator_get_pos (&src_iter)); } else { is_exp = true; consume_char (); if (LA (0) == LIT_CHAR_MINUS || LA (0) == LIT_CHAR_PLUS) { consume_char (); } continue; } } else if (!lit_char_is_decimal_digit (c)) { if (lexer_is_char_can_be_identifier_start (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Numeric literal shall not contain non-numeric characters", lit_utf8_iterator_get_pos (&src_iter)); } /* token is constructed at end of function */ break; } consume_char (); } tok_length = (size_t) (TOK_SIZE ()); if (is_fp || is_exp) { ecma_number_t res = ecma_utf8_string_to_number (TOK_START (), (jerry_api_size_t) tok_length); JERRY_ASSERT (!ecma_number_is_nan (res)); known_token = convert_seen_num_to_token (res); is_token_parse_in_progress = NULL; return known_token; } else if (*TOK_START () == LIT_CHAR_0 && tok_length != 1) { /* Octal integer literals */ if (strict_mode) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Octal integer literals are not allowed in strict mode", token_start_pos); } else { /* token is constructed at end of function */ const lit_utf8_byte_t *fp_buf_p = TOK_START (); for (i = 0; i < tok_length; i++) { fp_res = fp_res * 8 + (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i]); } } } else { const lit_utf8_byte_t *fp_buf_p = TOK_START (); /* token is constructed at end of function */ ecma_number_t mult = 1.0f; for (i = tok_length; i > 0; i--, mult *= 10) { fp_res += (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i - 1]) * mult; } } } if (fp_res >= 0 && fp_res <= 255 && (uint8_t) fp_res == fp_res) { known_token = create_token (TOK_SMALL_INT, (uint8_t) fp_res); is_token_parse_in_progress = NULL; return known_token; } else { known_token = convert_seen_num_to_token (fp_res); is_token_parse_in_progress = NULL; return known_token; } } /* lexer_parse_number */
/* Parse a field into tokens as defined by rfc822. */ static TOKEN parse_field (HDR_LINE hdr) { static const char specials[] = "<>@.,;:\\[]\"()"; static const char specials2[] = "<>@.,;:"; static const char tspecials[] = "/?=<>@,;:\\[]\"()"; static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really include '.'?*/ static struct { const unsigned char *name; size_t namelen; } tspecial_header[] = { { "Content-Type", 12}, { "Content-Transfer-Encoding", 25}, { "Content-Disposition", 19}, { NULL, 0} }; const char *delimiters; const char *delimiters2; const unsigned char *line, *s, *s2; size_t n; int i, invalid = 0; TOKEN t, tok, *tok_tail; errno = 0; if (!hdr) return NULL; tok = NULL; tok_tail = &tok; line = hdr->line; if (!(s = strchr (line, ':'))) return NULL; /* oops */ n = s - line; if (!n) return NULL; /* oops: invalid name */ delimiters = specials; delimiters2 = specials2; for (i = 0; tspecial_header[i].name; i++) { if (n == tspecial_header[i].namelen && !memcmp (line, tspecial_header[i].name, n)) { delimiters = tspecials; delimiters2 = tspecials2; break; } } s++; /* Move over the colon. */ for (;;) { while (!*s) { if (!hdr->next || !hdr->next->cont) return tok; /* Ready. */ /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } if (*s == '(') { int level = 1; int in_quote = 0; invalid = 0; for (s++;; s++) { while (!*s) { if (!hdr->next || !hdr->next->cont) goto oparen_out; /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } if (in_quote) { if (*s == '\"') in_quote = 0; else if (*s == '\\' && s[1]) /* what about continuation? */ s++; } else if (*s == ')') { if (!--level) break; } else if (*s == '(') level++; else if (*s == '\"') in_quote = 1; } oparen_out: if (!*s) ; /* Actually this is an error, but we don't care about it. */ else s++; } else if (*s == '\"' || *s == '[') { /* We do not check for non-allowed nesting of domainliterals */ int term = *s == '\"' ? '\"' : ']'; invalid = 0; s++; t = NULL; for (;;) { for (s2 = s; *s2; s2++) { if (*s2 == term) break; else if (*s2 == '\\' && s2[1]) /* what about continuation? */ s2++; } t = (t ? append_to_token (t, s, s2 - s) : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s)); if (!t) goto failure; if (*s2 || !hdr->next || !hdr->next->cont) break; /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } *tok_tail = t; tok_tail = &t->next; s = s2; if (*s) s++; /* skip the delimiter */ } else if ((s2 = strchr (delimiters2, *s))) { /* Special characters which are not handled above. */ invalid = 0; t = new_token (tSPECIAL, s, 1); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; s++; } else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') { invalid = 0; s = skip_ws (s + 1); } else if (*s > 0x20 && !(*s & 128)) { /* Atom. */ invalid = 0; for (s2 = s + 1; *s2 > 0x20 && !(*s2 & 128) && !strchr (delimiters, *s2); s2++) ; t = new_token (tATOM, s, s2 - s); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; s = s2; } else { /* Invalid character. */ if (!invalid) { /* For parsing we assume only one space. */ t = new_token (tSPACE, NULL, 0); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; invalid = 1; } s++; } } /*NOTREACHED*/ failure: { int save = errno; release_token_list (tok); errno = save; } return NULL; }
/* : , { } [ ] ( ) = ! + - * & "string" 'string' number id */ int tokenize(const char *s) { const char *last; if (!tokens) { tokens = malloc(MAXTOK * sizeof(char *)); if (!tokens) { die("out of memory\n"); return -1; } token.lineno = 0; } token.lineno++; #ifdef LEXER_READ_CPP if (s[0] == '#') { const char *q = s + 1; int lineno; char *p; if (!strncmp(q, "line", 4)) { q += 4; } lineno = strtoul(q, &p, 10); if (p > q && lineno > 0) { int stop = ' '; while (isspace(*p)) { p++; } if (IS_STRING(p)) { stop = *p++; } for (q = p; *q && *q != stop; q++) { if (*q == '\\' && q[1]) { q++; } } free(token.filename); token.filename = new_token(p, q - p); token.lineno = lineno - 1; s = q + strlen(q); } } #endif for (ntok = 0; *s && *s != ';'; s++) { char *p; int len; if (isspace(*s)) { continue; } last = s; len = 0; switch (*s) { case ':': case ',': case '{': case '}': case '[': case ']': case '(': case ')': case '=': case '!': case '+': case '-': case '*': case '&': len = 1; break; } if (!len) { if (IS_STRING(s)) { int quote = *s; while (*++s && *s != quote) { if (*s == '\\' && !*++s) { goto err_syntax; } } if (s[0] != quote) { goto err_syntax; } } else if (isdigit(*s)) { strtoul(s, &p, 0); if (isalph_(*p)) { goto err_syntax; } s = p - 1; } else if (isalph_(*s)) { while (isalnu_(s[1])) { s++; } } else { goto err_syntax; } len = 1; } if (ntok >= MAXTOK) { die("too many tokens\n"); goto err; } p = new_token(last, s + len - last); if (!p) { die("out of memory\n"); goto err; } tokens[ntok++] = p; s += len - 1; } itok = 0; return ntok; err_syntax: die("invalid token %s\n", last); err: while (--ntok >= 0) { free(tokens[ntok]); } return -1; }
/** * Parse string literal (ECMA-262 v5, 7.8.5) */ static token parse_regexp (void) { token result; bool is_char_class = false; /* Eat up '/' */ JERRY_ASSERT ((ecma_char_t) LA (0) == '/'); consume_char (); new_token (); while (true) { ecma_char_t c = (ecma_char_t) LA (0); if (c == '\0') { PARSE_ERROR ("Unclosed string", token_start - buffer_start); } else if (c == '\n') { PARSE_ERROR ("RegExp literal shall not contain newline character", token_start - buffer_start); } else if (c == '\\') { consume_char (); } else if (c == '[') { is_char_class = true; } else if (c == ']') { is_char_class = false; } else if (c == '/' && !is_char_class) { /* Eat up '/' */ consume_char (); break; } consume_char (); } /* Try to parse RegExp flags */ while (true) { ecma_char_t c = (ecma_char_t) LA (0); if (c == '\0' || !ecma_char_is_word_char (c) || ecma_char_is_line_terminator (c)) { break; } consume_char (); } result = convert_string_to_token (TOK_REGEXP, (const lit_utf8_byte_t *) token_start, static_cast<ecma_length_t> (buffer - token_start)); token_start = NULL; return result; } /* parse_regexp */
/** * Parse Identifier (ECMA-262 v5, 7.6) or ReservedWord (7.6.1; 7.8.1; 7.8.2). * * @return TOK_NAME - for Identifier, * TOK_KEYWORD - for Keyword or FutureReservedWord, * TOK_NULL - for NullLiteral, * TOK_BOOL - for BooleanLiteral */ static token lexer_parse_identifier_or_keyword (void) { ecma_char_t c = LA (0); JERRY_ASSERT (lexer_is_char_can_be_identifier_start (c)); new_token (); bool is_correct_identifier_name = true; bool is_escape_sequence_occured = false; bool is_all_chars_were_lowercase_ascii = true; while (true) { c = LA (0); if (c == LIT_CHAR_BACKSLASH) { consume_char (); is_escape_sequence_occured = true; bool is_unicode_escape_sequence = (LA (0) == LIT_CHAR_LOWERCASE_U); consume_char (); if (is_unicode_escape_sequence) { /* UnicodeEscapeSequence */ if (!lexer_convert_escape_sequence_digits_to_char (&src_iter, true, &c)) { is_correct_identifier_name = false; break; } else { /* c now contains character, encoded in the UnicodeEscapeSequence */ // Check character, converted from UnicodeEscapeSequence if (!lexer_is_char_can_be_identifier_part (c)) { is_correct_identifier_name = false; break; } } } else { is_correct_identifier_name = false; break; } } else if (!lexer_is_char_can_be_identifier_part (c)) { break; } else { if (!(c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)) { is_all_chars_were_lowercase_ascii = false; } consume_char (); } } if (!is_correct_identifier_name) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal identifier name", lit_utf8_iterator_get_pos (&src_iter)); } const lit_utf8_size_t charset_size = TOK_SIZE (); token ret = empty_token; if (!is_escape_sequence_occured && is_all_chars_were_lowercase_ascii) { /* Keyword or FutureReservedWord (TOK_KEYWORD), or boolean literal (TOK_BOOL), or null literal (TOK_NULL) */ ret = lexer_parse_reserved_word (TOK_START (), charset_size); } if (is_empty (ret)) { /* Identifier (TOK_NAME) */ if (!is_escape_sequence_occured) { ret = lexer_create_token_for_charset (TOK_NAME, TOK_START (), charset_size); } else { ret = lexer_create_token_for_charset_transform_escape_sequences (TOK_NAME, TOK_START (), charset_size); } } is_token_parse_in_progress = false; return ret; } /* lexer_parse_identifier_or_keyword */
/* In this function we cannot use strtol function since there is no octal literals in ECMAscript. */ static token parse_number (void) { ecma_char_t c = LA (0); bool is_hex = false; bool is_fp = false; bool is_exp = false; bool is_overflow = false; ecma_number_t fp_res = .0; size_t tok_length = 0, i; uint32_t res = 0; token known_token; JERRY_ASSERT (isdigit (c) || c == '.'); if (c == '0') { if (LA (1) == 'x' || LA (1) == 'X') { is_hex = true; } } if (c == '.') { JERRY_ASSERT (!isalpha (LA (1))); is_fp = true; } if (is_hex) { // Eat up '0x' consume_char (); consume_char (); new_token (); while (true) { c = LA (0); if (!isxdigit (c)) { break; } consume_char (); } if (isalpha (c) || c == '_' || c == '$') { PARSE_ERROR ("Integer literal shall not contain non-digit characters", buffer - buffer_start); } tok_length = (size_t) (buffer - token_start); for (i = 0; i < tok_length; i++) { if (!is_overflow) { res = (res << 4) + ecma_char_hex_to_int (token_start[i]); } else { fp_res = fp_res * 16 + (ecma_number_t) ecma_char_hex_to_int (token_start[i]); } if (res > 255) { fp_res = (ecma_number_t) res; is_overflow = true; res = 0; } } if (is_overflow) { known_token = convert_seen_num_to_token (fp_res); token_start = NULL; return known_token; } else { known_token = create_token (TOK_SMALL_INT, (uint8_t) res); token_start = NULL; return known_token; } } JERRY_ASSERT (!is_hex && !is_exp); new_token (); // Eat up '.' if (is_fp) { consume_char (); } while (true) { c = LA (0); if (is_fp && c == '.') { FIXME (/* This is wrong: 1..toString (). */) PARSE_ERROR ("Integer literal shall not contain more than one dot character", buffer - buffer_start); } if (is_exp && (c == 'e' || c == 'E')) { PARSE_ERROR ("Integer literal shall not contain more than exponential marker ('e' or 'E')", buffer - buffer_start); } if (c == '.') { if (isalpha (LA (1)) || LA (1) == '_' || LA (1) == '$') { PARSE_ERROR ("Integer literal shall not contain non-digit character after got character", buffer - buffer_start); } is_fp = true; consume_char (); continue; } if (c == 'e' || c == 'E') { if (LA (1) == '-' || LA (1) == '+') { consume_char (); } if (!isdigit (LA (1))) { PARSE_ERROR ("Integer literal shall not contain non-digit character after exponential marker ('e' or 'E')", buffer - buffer_start); } is_exp = true; consume_char (); continue; } if (isalpha (c) || c == '_' || c == '$') { PARSE_ERROR ("Integer literal shall not contain non-digit characters", buffer - buffer_start); } if (!isdigit (c)) { break; } consume_char (); } tok_length = (size_t) (buffer - token_start); if (is_fp || is_exp) { ecma_number_t res = ecma_utf8_string_to_number (token_start, (jerry_api_size_t) tok_length); JERRY_ASSERT (!ecma_number_is_nan (res)); known_token = convert_seen_num_to_token (res); token_start = NULL; return known_token; } if (*token_start == '0' && tok_length != 1) { if (strict_mode) { PARSE_ERROR ("Octal tnteger literals are not allowed in strict mode", token_start - buffer_start); } for (i = 0; i < tok_length; i++) { if (!is_overflow) { res = res * 8 + ecma_char_hex_to_int (token_start[i]); } else { fp_res = fp_res * 8 + (ecma_number_t) ecma_char_hex_to_int (token_start[i]); } if (res > 255) { fp_res = (ecma_number_t) res; is_overflow = true; res = 0; } } } else { for (i = 0; i < tok_length; i++) { if (!is_overflow) { res = res * 10 + ecma_char_hex_to_int (token_start[i]); } else { fp_res = fp_res * 10 + (ecma_number_t) ecma_char_hex_to_int (token_start[i]); } if (res > 255) { fp_res = (ecma_number_t) res; is_overflow = true; res = 0; } } } if (is_overflow) { known_token = convert_seen_num_to_token (fp_res); token_start = NULL; return known_token; } else { known_token = create_token (TOK_SMALL_INT, (uint8_t) res); token_start = NULL; return known_token; } }
/** * Parse identifier (ECMA-262 v5, 7.6) or keyword (7.6.1.1) */ static token parse_name (void) { ecma_char_t c = (ecma_char_t) LA (0); token known_token = empty_token; JERRY_ASSERT (isalpha (c) || c == '$' || c == '_'); new_token (); while (true) { c = (ecma_char_t) LA (0); if (!isalpha (c) && !isdigit (c) && c != '$' && c != '_' && c != '\\') { break; } else { consume_char (); if (c == '\\') { bool is_correct_sequence = (LA (0) == 'u'); if (is_correct_sequence) { consume_char (); } for (uint32_t i = 0; is_correct_sequence && i < 4; i++) { if (!isxdigit (LA (0))) { is_correct_sequence = false; break; } consume_char (); } if (!is_correct_sequence) { PARSE_ERROR ("Malformed escape sequence", token_start - buffer_start); } } } } known_token = convert_string_to_token_transform_escape_seq (TOK_NAME, token_start, (size_t) (buffer - token_start)); token_start = NULL; return known_token; } /* parse_name */
/* process the file `in' and output the result to `out' file */ void process_file (STREAM *in, STREAM *out) { STREAM *old_i_stream = _i_stream; STREAM *old_o_stream = _o_stream; char *old_current_line = current_line; char *old_current_col = current_col; char *s, buf[MAX_BYTES]; _i_stream = in; _o_stream = out; old_current_line = current_line; old_current_col = current_col; new_token (TOK_SPACE); update_state (); while (stgets (buf, MAX_BYTES, in)) { for (s = buf; *s; s++) { /* tag beginning */ if ((*s == '<') && (s[1] == '!')) { int c, i, used = FALSE; int restore = TRUE; char *tag = s + 1; /* jump the comment? */ if ((s[2] == '-') && (s[3] == '-')) { if (!kill_comments) stputs ("<!", out); s += 2; for (;;) { if (strncmp (s, "-->", 3) == 0) { if (!kill_comments) stputs ("-->", out); s += 2; break; } else if (*s == 0) { if (!stgets (buf, MAX_BYTES, in)) break; s = buf; } else { if (!kill_comments) stputc (*s, out); s++; } } continue; } /* jump nested tags */ for (c = 0;; s++) { if (*s == '<') c++; else if (*s == '>') { c--; if (c == 0) break; } else if (*s == 0) { if (!stgets (buf + strlen (buf), MAX_BYTES - strlen (buf), in)) break; s--; } } c = *s; *s = 0; log_printf (2, "tag found: \"%s\"\n", tag + 1); /* check for <!arg...> */ if (strncmp (tag + 1, "arg", 3) == 0) { if (can_attach) { /* <!args...> */ if (tag[4] == 's') { char temp[32]; sprintf (temp, "%d", nargs); stputs (temp, out); } /* <!arg[1-9][0-9]*...> */ else { int arg = strtol (tag + 4, NULL, 10); if ((arg > 0) && (arg <= nargs) && (args[arg - 1])) stputs (args[arg - 1], out); } } used = TRUE; } /* check for built-ins functions <!...> */ if (!used) { for (i = 0; i < ntags; i++) { if (strncmp (tag + 1, tags[i].name, strlen (tags[i].name)) == 0) { int x = tag[1 + strlen (tags[i].name)]; if (IS_BLANK (x) || (!x)) { char *tok, *argv[MAX_ARGS]; char *replacement; char *holder = NULL; int argc = 0; for (tok = own_strtok (tag + 2, &holder), tok = own_strtok (NULL, &holder); tok; tok = own_strtok (NULL, &holder)) argv[argc++] = tok; if ((tags[i].if_tag) || (can_attach)) { current_line = buf; current_col = s + 1; /* call the tag procedure */ replacement = (*tags[i].proc) (argc, argv); if (s != current_col - 1) { s = current_col - 1; restore = FALSE; } /* text to replace */ if (replacement) { stputs (replacement, out); free (replacement); } log_printf (2, "tag \"%s\" was processed\n", tags[i].name); } else log_printf (2, "tag \"%s\" wasn't processed\n", tags[i].name); used = TRUE; break; } } } } /* check for user functional macros <!...> */ if (!used && can_attach) { char *replacement = function_macro (macros_space[nmacros_space-1], tag); if (replacement) { stputs (replacement, out); free (replacement); used = TRUE; } } /* well, this is an unknown tag */ if (!used) { char *ptag = process_text (tag); if (can_attach) stputc ('<', out); if (ptag) { if (can_attach) stputs (ptag, out); free (ptag); } if (can_attach) stputc ('>', out); } if (restore) { if (!c) s--; else *s = c; } } /* put a character in the output file */ else if (can_attach) { char *replacement = NULL; int c, length = 0; /* check for macros */ for (c = 0; c < nmacros_space; c++) { replacement = replace_by_macro (macros_space[c], s, &length); if (replacement) break; } /* just put the character */ if (!replacement) { stputc (*s, out); } /* put the value of the macro */ else { stputs (replacement, out); s += length - 1; free (replacement); } } } } delete_token (); update_state (); _i_stream = old_i_stream; _o_stream = old_o_stream; current_line = old_current_line; current_col = old_current_col; }
/**************** * Parse a field into tokens as defined by rfc822. */ static TOKEN parse_field( HDR_LINE hdr ) { static const char specials[] = "<>@.,;:\\[]\"()"; static const char specials2[]= "<>@.,;:"; static const char tspecials[] = "/?=<>@,;:\\[]\"()"; static const char tspecials2[]= "/?=<>@.,;:"; static struct { const char *name; int namelen; } tspecial_header[] = { { "Content-Type", 12 }, { "Content-Transfer-Encoding", 25 }, { NULL, 0 } }; const char *delimiters; const char *delimiters2; const char *line, *s, *s2; size_t n; int i, invalid = 0; TOKEN t, tok, *tok_head; if( !hdr ) return NULL; tok = NULL; tok_head = &tok; line = hdr->line; if( !(s = strchr( line, ':' )) ) return NULL; /* oops */ n = s - line; if( !n ) return NULL; /* oops: invalid name */ delimiters = specials; delimiters2 = specials2; for(i=0; tspecial_header[i].name; i++ ) { if( n == tspecial_header[i].namelen && !memicmp( line, tspecial_header[i].name, n ) ) { delimiters = tspecials; delimiters2 = tspecials2; break; } } /* Add this point we could store the fieldname in the parsing structure. * If we decide to do this, we should lowercase the name except for the * first character which should be uppercased. This way we don't * need to apply the case insensitive compare in the future */ s++; /* move over the colon */ for(;;) { if( !*s ) { if( !hdr->next || !hdr->next->cont ) break; hdr = hdr->next; s = hdr->line; } if( *s == '(' ) { int level = 1; int in_quote = 0; invalid = 0; for(s++ ; ; s++ ) { if( !*s ) { if( !hdr->next || !hdr->next->cont ) break; hdr = hdr->next; s = hdr->line; } if( in_quote ) { if( *s == '\"' ) in_quote = 0; else if( *s == '\\' && s[1] ) /* what about continuation?*/ s++; } else if( *s == ')' ) { if( !--level ) break; } else if( *s == '(' ) level++; else if( *s == '\"' ) in_quote = 1; } if( !*s ) ;/* actually this is an error, but we don't care about it */ else s++; } else if( *s == '\"' || *s == '[' ) { /* We do not check for non-allowed nesting of domainliterals */ int term = *s == '\"' ? '\"' : ']'; invalid = 0; s++; t = NULL; for(;;) { for( s2 = s; *s2; s2++ ) { if( *s2 == term ) break; else if( *s2 == '\\' && s2[1] ) /* what about continuation?*/ s2++; } t = t ? append_to_token( t, s, s2-s) : new_token( term == '\"'? tQUOTED : tDOMAINLIT, s, s2-s); if( *s2 || !hdr->next || !hdr->next->cont ) break; hdr = hdr->next; s = hdr->line; } *tok_head = t; tok_head = &t->next; s = s2; if( *s ) s++; /* skip the delimiter */ } else if( (s2 = strchr( delimiters2, *s )) ) { /* special characters which are not handled above */ invalid = 0; t = new_token( tSPECIAL, s, 1 ); *tok_head = t; tok_head = &t->next; s++; } else if( *s == ' ' || *s == '\t' || *s == '\r' || *s == '\n' ) { invalid = 0; s = skip_ws(s+1); } else if( *s > 0x20 && !(*s & 128) ) { /* atom */ invalid = 0; for( s2 = s+1; *s2 > 0x20 && !(*s2 & 128 ) && !strchr( delimiters, *s2 ); s2++ ) ; t = new_token( tATOM, s, s2-s ); *tok_head = t; tok_head = &t->next; s = s2; } else { /* invalid character */ if( !invalid ) { /* for parsing we assume only one space */ t = new_token( tSPACE, NULL, 0); *tok_head = t; tok_head = &t->next; invalid = 1; } s++; } } return tok; }
/* constructs a queue of tokens in infix order from a space-delimited string */ struct token_queue expr_to_infix(char * str) { struct token_queue queue_infix; /* queue with infix ordering */ enum token_type type = OPERATOR; union token_value value; /* initialize the queue to empty */ queue_infix.front = NULL; queue_infix.back = NULL; /* delimiter string for strtok() -- contains whitespace characters */ #define DELIMS_STR " \n\r\t" for (str = strtok(str, DELIMS_STR); str; str = strtok(NULL, DELIMS_STR)) { /* parse token */ if (strlen(str) == 1) { /* operators are all 1 character */ switch (str[0]) { case '+': type = OPERATOR; value.op_code = ADD; break; case '-': /* check previous token to distinguish between negate (unary) and subtract (binary) */ if (type == OPERATOR) value.op_code = NEGATE; /* unary */ #if PARSE_PARENS else if (type == LPARENS) value.op_code = NEGATE; /* unary */ #endif else value.op_code = SUBTRACT; /* binary */ type = OPERATOR; break; case '*': type = OPERATOR; value.op_code = MULTIPLY; break; case '/': type = OPERATOR; value.op_code = DIVIDE; break; #if PARSE_PARENS case '(': type = LPARENS; break; case ')': type = RPARENS; break; #endif default: /* not an operator */ type = OPERAND; value.operand = strtod(str, NULL); } } else { type = OPERAND; value.operand = strtod(str, NULL); } /* add token with parsed type and value to end of queue */ enqueue(&queue_infix, new_token(type, value)); } return queue_infix; }