WsBool ws_bc_add_function(WsBc *bc, WsUInt8 *index_return, char *name, WsUInt8 num_arguments, WsUInt8 num_locals, WsUInt32 code_size, unsigned char *code) { WsBcFunction *nf; /* First, add the function to the function pool. */ nf = ws_realloc(bc->functions, (bc->num_functions + 1) * sizeof(WsBcFunction)); if (nf == NULL) return WS_FALSE; bc->functions = nf; bc->functions[bc->num_functions].num_arguments = num_arguments; bc->functions[bc->num_functions].num_locals = num_locals; bc->functions[bc->num_functions].code_size = code_size; bc->functions[bc->num_functions].code = ws_memdup(code, code_size); if (bc->functions[bc->num_functions].code == NULL) return WS_FALSE; /* Save the index of the function. */ *index_return = bc->num_functions++; /* For external functions (which have name), add a name entry to the function name pool. */ if (name) { WsBcFunctionName *nfn; nfn = ws_realloc(bc->function_names, ((bc->num_function_names + 1) * sizeof(WsBcFunctionName))); if (nfn == NULL) return WS_FALSE; bc->function_names = nfn; bc->function_names[bc->num_function_names].index = *index_return; bc->function_names[bc->num_function_names].name = ws_strdup(name); if (bc->function_names[bc->num_function_names].name == NULL) return WS_FALSE; bc->num_function_names++; } /* All done. */ return WS_TRUE; }
WsBool ws_bc_add_const_empty_string(WsBc *bc, WsUInt16 *index_return) { WsUInt16 i; WsBcConstant *nc; /* Do we already have a suitable empty string constant? */ for (i = 0; i < bc->num_constants; i++) { if (bc->constants[i].type == WS_BC_CONST_TYPE_EMPTY_STRING) { *index_return = i; return WS_TRUE; } } /* Must add a new constant. */ nc = ws_realloc(bc->constants, (bc->num_constants + 1) * sizeof(WsBcConstant)); if (nc == NULL) return WS_FALSE; bc->constants = nc; bc->constants[bc->num_constants].type = WS_BC_CONST_TYPE_EMPTY_STRING; *index_return = bc->num_constants++; return WS_TRUE; }
WsBool ws_bc_add_const_float(WsBc *bc, WsUInt16 *index_return, WsFloat value) { WsUInt16 i; WsBcConstant *nc; /* Do we already have a suitable float32 constant? */ for (i = 0; i < bc->num_constants; i++) { if (bc->constants[i].type == WS_BC_CONST_TYPE_FLOAT32 && bc->constants[i].u.v_float == value) { *index_return = i; return WS_TRUE; } } /* Must add a new constant. */ nc = ws_realloc(bc->constants, (bc->num_constants + 1) * sizeof(WsBcConstant)); if (nc == NULL) return WS_FALSE; bc->constants = nc; bc->constants[bc->num_constants].type = WS_BC_CONST_TYPE_FLOAT32; bc->constants[bc->num_constants].u.v_float = value; *index_return = bc->num_constants++; return WS_TRUE; }
int ws_utf8_append_char(WsUtf8String *string, unsigned long ch) { unsigned char *d; unsigned int num_bytes = WS_UTF8_ENC_TYPE(ch); unsigned int len, i; if (num_bytes == 0) ws_fatal("ws_utf8_append_char(): 0x%lx is not a valid UTF-8 character", ch); d = ws_realloc(string->data, string->len + num_bytes); if (d == NULL) return 0; len = string->len; /* Encode the continuation bytes (n > 1). */ for (i = num_bytes - 1; i > 0; i--) { d[len + i] = WS_UTF8_ENC_C_BITS; d[len + i] |= ch & WS_UTF8_CONT_DATA_MASK; ch >>= 6; } /* And continue the first byte. */ d[len] = utf8_hibits[num_bytes]; d[len] |= ch; string->data = d; string->len += num_bytes; string->num_chars++; return 1; }
static WsBcPragma *add_pragma(WsBc *bc, WsBcPragmaType type) { WsBcPragma *np; /* Add a new pragma slot. */ np = ws_realloc(bc->pragmas, (bc->num_pragmas + 1) * sizeof(WsBcPragma)); if (np == NULL) return NULL; bc->pragmas = np; bc->pragmas[bc->num_pragmas].type = type; return &bc->pragmas[bc->num_pragmas++]; }
void ws_function(WsCompiler *compiler, WsBool externp, char *name, WsUInt32 line, WsList *params, WsList *block) { WsFunctionHash *hash; WsFunction *f = ws_realloc(compiler->functions, ((compiler->num_functions + 1) * sizeof(WsFunction))); if (f == NULL) { ws_free(name); ws_error_memory(compiler); return; } if (externp) compiler->num_extern_functions++; else compiler->num_local_functions++; compiler->functions = f; f = &compiler->functions[compiler->num_functions]; f->findex = compiler->num_functions++; f->externp = externp; f->name = name; f->line = line; f->params = params; f->block = block; /* Update the function name hash. */ hash = ws_function_hash(compiler, name); if (hash == NULL) { ws_error_memory(compiler); return; } if (hash->defined) { ws_src_error(compiler, line, "redefinition of `%s'", name); ws_src_error(compiler, compiler->functions[hash->findex].line, "`%s' previously defined here", name); return; } hash->defined = WS_TRUE; hash->findex = f->findex; }
WsBool ws_buffer_append_space(WsBuffer *buffer, unsigned char **p, size_t size) { unsigned char *ndata = ws_realloc(buffer->data, buffer->len + size); if (ndata == NULL) return WS_FALSE; buffer->data = ndata; if (p) *p = buffer->data + buffer->len; buffer->len += size; return WS_TRUE; }
WsBool ws_lexer_register_block(WsCompiler *compiler, void *ptr) { void **n; if (ptr == NULL) return WS_TRUE; n = ws_realloc(compiler->lexer_active_list, ((compiler->lexer_active_list_size + 1) * sizeof(void *))); if (n == NULL) return WS_FALSE; compiler->lexer_active_list = n; compiler->lexer_active_list[compiler->lexer_active_list_size++] = ptr; return WS_TRUE; }
WsBool ws_bc_add_const_utf8_string(WsBc *bc, WsUInt16 *index_return, const unsigned char *data, size_t len) { WsUInt16 i; WsBcConstant *nc; /* Do we already have a suitable UFT-8 constant? */ for (i = 0; i < bc->num_constants; i++) { if (bc->constants[i].type == WS_BC_CONST_TYPE_UTF8_STRING && bc->constants[i].u.v_string.len == len && memcmp(bc->constants[i].u.v_string.data, data, len) == 0) { *index_return = i; return WS_TRUE; } } /* Must add a new constant. */ nc = ws_realloc(bc->constants, (bc->num_constants + 1) * sizeof(WsBcConstant)); if (nc == NULL) return WS_FALSE; bc->constants = nc; bc->constants[bc->num_constants].type = WS_BC_CONST_TYPE_UTF8_STRING; bc->constants[bc->num_constants].u.v_string.len = len; bc->constants[bc->num_constants].u.v_string.data = ws_memdup(data, len); if (bc->constants[bc->num_constants].u.v_string.data == NULL) return WS_FALSE; *index_return = bc->num_constants++; return WS_TRUE; }
int ws_yy_lex(YYSTYPE *yylval, YYLTYPE *yylloc, void *context) { WsCompiler *compiler = (WsCompiler *) context; WsUInt32 ch, ch2; WsBuffer buffer; unsigned char *p; WsBool success; /* Just check that we get the correct amount of arguments. */ gw_assert(compiler->magic == COMPILER_MAGIC); while (ws_stream_getc(compiler->input, &ch)) { /* Save the token's line number. */ yylloc->first_line = compiler->linenum; switch (ch) { case '\t': /* Whitespace characters. */ case '\v': case '\f': case ' ': continue; case '\n': /* Line terminators. */ case '\r': if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) { if (ch2 != '\n') ws_stream_ungetc(compiler->input, ch2); } compiler->linenum++; continue; case '!': /* !, != */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tNE; ws_stream_ungetc(compiler->input, ch2); } return '!'; case '%': /* %, %= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tREMA; ws_stream_ungetc(compiler->input, ch2); } return '%'; case '&': /* &, &&, &= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '&') return tAND; if (ch2 == '=') return tANDA; ws_stream_ungetc(compiler->input, ch2); } return '&'; case '*': /* *, *= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tMULA; ws_stream_ungetc(compiler->input, ch2); } return '*'; case '+': /* +, ++, += */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '+') return tPLUSPLUS; if (ch2 == '=') return tADDA; ws_stream_ungetc(compiler->input, ch2); } return '+'; case '-': /* -, --, -= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '-') return tMINUSMINUS; if (ch2 == '=') return tSUBA; ws_stream_ungetc(compiler->input, ch2); } return '-'; case '.': if (ws_stream_getc(compiler->input, &ch2)) { if (WS_IS_DECIMAL_DIGIT(ch2)) { /* DecimalFloatLiteral. */ ws_buffer_init(&buffer); if (!ws_buffer_append_space(&buffer, &p, 2)) { ws_error_memory(compiler); ws_buffer_uninit(&buffer); return EOF; } p[0] = '.'; p[1] = (unsigned char) ch2; success = read_float_from_point(compiler, &buffer, &yylval->vfloat); ws_buffer_uninit(&buffer); if (!success) return EOF; return tFLOAT; } ws_stream_ungetc(compiler->input, ch2); } return '.'; case '/': /* /, /=, block or a single line comment */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '*') { /* Block comment. */ while (1) { if (!ws_stream_getc(compiler->input, &ch)) { ws_src_error(compiler, 0, "EOF in comment"); return EOF; } if (ch == '\n' || ch == '\r') { /* Line terminators. */ if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) { if (ch2 != '\n') ws_stream_ungetc(compiler->input, ch2); } compiler->linenum++; /* Continue reading the block comment. */ continue; } if (ch == '*' && ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '/') /* The end of the comment found. */ break; ws_stream_ungetc(compiler->input, ch2); } } /* Continue after the comment. */ continue; } if (ch2 == '/') { /* Single line comment. */ while (1) { if (!ws_stream_getc(compiler->input, &ch)) /* The end of input stream reached. We accept this as a valid comment terminator. */ break; if (ch == '\n' || ch == '\r') { /* Line terminators. */ if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) { if (ch2 != '\n') ws_stream_ungetc(compiler->input, ch2); } /* The end of the line (and the comment) reached. */ compiler->linenum++; break; } } /* Continue after the comment. */ continue; } if (ch2 == '=') return tDIVA; ws_stream_ungetc(compiler->input, ch2); } return '/'; case '<': /* <, <<, <<=, <= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '<') { if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tLSHIFTA; ws_stream_ungetc(compiler->input, ch2); } return tLSHIFT; } if (ch2 == '=') return tLE; ws_stream_ungetc(compiler->input, ch2); } return '<'; case '=': /* =, == */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tEQ; ws_stream_ungetc(compiler->input, ch2); } return '='; case '>': /* >, >=, >>, >>=, >>>, >>>= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '>') { if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '>') { if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tRSZSHIFTA; ws_stream_ungetc(compiler->input, ch2); } return tRSZSHIFT; } if (ch2 == '=') return tRSSHIFTA; ws_stream_ungetc(compiler->input, ch2); } return tRSSHIFT; } if (ch2 == '=') return tGE; ws_stream_ungetc(compiler->input, ch2); } return '>'; case '^': /* ^, ^= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tXORA; ws_stream_ungetc(compiler->input, ch2); } return '^'; case '|': /* |, |=, || */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tORA; if (ch2 == '|') return tOR; ws_stream_ungetc(compiler->input, ch2); } return '|'; case '#': /* The simple cases. */ case '(': case ')': case ',': case ':': case ';': case '?': case '{': case '}': case '~': return (int) ch; case '\'': /* String literals. */ case '"': { WsUInt32 string_end_ch = ch; WsUtf8String *str = ws_utf8_alloc(); if (str == NULL) { ws_error_memory(compiler); return EOF; } while (1) { if (!ws_stream_getc(compiler->input, &ch)) { eof_in_string_literal: ws_src_error(compiler, 0, "EOF in string literal"); ws_utf8_free(str); return EOF; } if (ch == string_end_ch) /* The end of string reached. */ break; if (ch == '\\') { /* An escape sequence. */ if (!ws_stream_getc(compiler->input, &ch)) goto eof_in_string_literal; switch (ch) { case '\'': case '"': case '\\': case '/': /* The character as-is. */ break; case 'b': ch = '\b'; break; case 'f': ch = '\f'; break; case 'n': ch = '\n'; break; case 'r': ch = '\r'; break; case 't': ch = '\t'; break; case 'x': case 'u': { int i, len; int type = ch; if (ch == 'x') len = 2; else len = 4; ch = 0; for (i = 0; i < len; i++) { if (!ws_stream_getc(compiler->input, &ch2)) goto eof_in_string_literal; if (!WS_IS_HEX_DIGIT(ch2)) { ws_src_error(compiler, 0, "malformed `\\%c' escape in " "string literal", (char) type); ch = 0; break; } ch *= 16; ch += WS_HEX_TO_INT(ch2); } } break; default: if (WS_IS_OCTAL_DIGIT(ch)) { int i; int limit = 3; ch = WS_OCTAL_TO_INT(ch); if (ch > 3) limit = 2; for (i = 1; i < limit; i++) { if (!ws_stream_getc(compiler->input, &ch2)) goto eof_in_string_literal; if (!WS_IS_OCTAL_DIGIT(ch2)) { ws_stream_ungetc(compiler->input, ch2); break; } ch *= 8; ch += WS_OCTAL_TO_INT(ch2); } } else { ws_src_error(compiler, 0, "unknown escape sequence `\\%c' in " "string literal", (char) ch); ch = 0; } break; } /* FALLTHROUGH */ } if (!ws_utf8_append_char(str, ch)) { ws_error_memory(compiler); ws_utf8_free(str); return EOF; } } if (!ws_lexer_register_utf8(compiler, str)) { ws_error_memory(compiler); ws_utf8_free(str); return EOF; } gw_assert(str != NULL); yylval->string = str; return tSTRING; } break; default: /* Identifiers, keywords and number constants. */ if (WS_IS_IDENTIFIER_LETTER(ch)) { WsBool got; int token; unsigned char *p; unsigned char *np; size_t len = 0; /* An identifier or a keyword. We start with a 256 * bytes long buffer but it is expanded dynamically if * needed. However, 256 should be enought for most * cases since the byte-code format limits the function * names to 255 characters. */ p = ws_malloc(256); if (p == NULL) { ws_error_memory(compiler); return EOF; } do { /* Add one extra for the possible terminator character. */ np = ws_realloc(p, len + 2); if (np == NULL) { ws_error_memory(compiler); ws_free(p); return EOF; } p = np; /* This is ok since the only valid identifier names * can be written in 7 bit ASCII. */ p[len++] = (unsigned char) ch; } while ((got = ws_stream_getc(compiler->input, &ch)) && (WS_IS_IDENTIFIER_LETTER(ch) || WS_IS_DECIMAL_DIGIT(ch))); if (got) /* Put back the terminator character. */ ws_stream_ungetc(compiler->input, ch); /* Is it a keyword? */ if (lookup_keyword((char *) p, len, &token)) { /* Yes it is... */ ws_free(p); /* ...except one case: `div='. */ if (token == tIDIV) { if (ws_stream_getc(compiler->input, &ch)) { if (ch == '=') return tIDIVA; ws_stream_ungetc(compiler->input, ch); } } /* Return the token value. */ return token; } /* It is a normal identifier. Let's pad the name with a null-character. We have already allocated space for it. */ p[len] = '\0'; if (!ws_lexer_register_block(compiler, p)) { ws_error_memory(compiler); ws_free(p); return EOF; } gw_assert(p != NULL); yylval->identifier = (char *) p; return tIDENTIFIER; } if (WS_IS_NON_ZERO_DIGIT(ch)) { /* A decimal integer literal or a decimal float literal. */ ws_buffer_init(&buffer); if (!ws_buffer_append_space(&buffer, &p, 1)) { number_error_memory: ws_error_memory(compiler); ws_buffer_uninit(&buffer); return EOF; } p[0] = ch; while (ws_stream_getc(compiler->input, &ch)) { if (WS_IS_DECIMAL_DIGIT(ch)) { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = ch; } else if (ch == '.' || ch == 'e' || ch == 'E') { /* DecimalFloatLiteral. */ if (ch == '.') { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = '.'; success = read_float_from_point(compiler, &buffer, &yylval->vfloat); } else { ws_stream_ungetc(compiler->input, ch); success = read_float_from_exp(compiler, &buffer, &yylval->vfloat); } ws_buffer_uninit(&buffer); if (!success) return EOF; return tFLOAT; } else { ws_stream_ungetc(compiler->input, ch); break; } } /* Now the buffer contains an integer number as a string. Let's convert it to an integer number. */ yylval->integer = buffer_to_int(compiler, &buffer); ws_buffer_uninit(&buffer); /* Read a DecimalIntegerLiteral. */ return tINTEGER; } if (ch == '0') { /* The integer constant 0, an octal number or a HexIntegerLiteral. */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == 'x' || ch2 == 'X') { /* HexIntegerLiteral. */ ws_buffer_init(&buffer); if (!ws_buffer_append_space(&buffer, &p, 2)) goto number_error_memory; p[0] = '0'; p[1] = 'x'; while (ws_stream_getc(compiler->input, &ch)) { if (WS_IS_HEX_DIGIT(ch)) { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = ch; } else { ws_stream_ungetc(compiler->input, ch); break; } } if (ws_buffer_len(&buffer) == 2) { ws_buffer_uninit(&buffer); ws_src_error(compiler, 0, "numeric constant with no digits"); yylval->integer = 0; return tINTEGER; } /* Now the buffer contains an integer number as * a string. Let's convert it to an integer * number. */ yylval->integer = buffer_to_int(compiler, &buffer); ws_buffer_uninit(&buffer); /* Read a HexIntegerLiteral. */ return tINTEGER; } if (WS_IS_OCTAL_DIGIT(ch2)) { /* OctalIntegerLiteral. */ ws_buffer_init(&buffer); if (!ws_buffer_append_space(&buffer, &p, 2)) goto number_error_memory; p[0] = '0'; p[1] = ch2; while (ws_stream_getc(compiler->input, &ch)) { if (WS_IS_OCTAL_DIGIT(ch)) { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = ch; } else { ws_stream_ungetc(compiler->input, ch); break; } } /* Convert the buffer into an intger number. */ yylval->integer = buffer_to_int(compiler, &buffer); ws_buffer_uninit(&buffer); /* Read an OctalIntegerLiteral. */ return tINTEGER; } if (ch2 == '.' || ch2 == 'e' || ch2 == 'E') { /* DecimalFloatLiteral. */ ws_buffer_init(&buffer); if (ch2 == '.') { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = '.'; success = read_float_from_point(compiler, &buffer, &yylval->vfloat); } else { ws_stream_ungetc(compiler->input, ch); success = read_float_from_exp(compiler, &buffer, &yylval->vfloat); } ws_buffer_uninit(&buffer); if (!success) return EOF; return tFLOAT; } ws_stream_ungetc(compiler->input, ch2); } /* Integer literal 0. */ yylval->integer = 0; return tINTEGER; } /* Garbage found from the input stream. */ ws_src_error(compiler, 0, "garbage found from the input stream: character=0x%x", ch); return EOF; break; } } return EOF; }