WsBool ws_bc_encode(WsBc *bc, unsigned char **data_return, size_t *data_len_return) { WsBuffer buffer; WsUInt32 ui; unsigned char data[64]; unsigned char *p, *mb; size_t len; ws_buffer_init(&buffer); /* Append space for the header. We do not know yet the size of the resulting byte-code. */ if (!ws_buffer_append_space(&buffer, NULL, WS_BC_MAX_HEADER_LEN)) goto error; /* Constants. */ if (!ws_encode_buffer(&buffer, WS_ENC_MB_UINT16, bc->num_constants, WS_ENC_MB_UINT16, (WsUInt16) bc->string_encoding, WS_ENC_END)) goto error; for (ui = 0 ; ui < bc->num_constants; ui++) { switch (bc->constants[ui].type) { case WS_BC_CONST_TYPE_INT: if (WS_INT8_MIN <= bc->constants[ui].u.v_int && bc->constants[ui].u.v_int <= WS_INT8_MAX) { if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_CONST_INT8, WS_ENC_INT8, (WsInt8) bc->constants[ui].u.v_int, WS_ENC_END)) goto error; } else if (WS_INT16_MIN <= bc->constants[ui].u.v_int && bc->constants[ui].u.v_int <= WS_INT16_MAX) { if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_CONST_INT16, WS_ENC_INT16, (WsInt16) bc->constants[ui].u.v_int, WS_ENC_END)) goto error; } else { if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_CONST_INT32, WS_ENC_INT32, bc->constants[ui].u.v_int, WS_ENC_END)) goto error; } break; case WS_BC_CONST_TYPE_FLOAT32: case WS_BC_CONST_TYPE_FLOAT32_NAN: case WS_BC_CONST_TYPE_FLOAT32_POSITIVE_INF: case WS_BC_CONST_TYPE_FLOAT32_NEGATIVE_INF: switch (bc->constants[ui].type) { case WS_BC_CONST_TYPE_FLOAT32: ws_ieee754_encode_single(bc->constants[ui].u.v_float, data); p = data; break; case WS_BC_CONST_TYPE_FLOAT32_NAN: p = ws_ieee754_nan; break; case WS_BC_CONST_TYPE_FLOAT32_POSITIVE_INF: p = ws_ieee754_positive_inf; break; case WS_BC_CONST_TYPE_FLOAT32_NEGATIVE_INF: p = ws_ieee754_negative_inf; break; default: ws_fatal("ws_bc_encode(): internal inconsistency"); /* NOTREACHED */ p = NULL; /* Initialized to keep compiler quiet. */ break; } if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_CONST_FLOAT32, WS_ENC_DATA, p, 4, WS_ENC_END)) goto error; break; break; case WS_BC_CONST_TYPE_UTF8_STRING: /* Encode the strings as requested. */ switch (bc->string_encoding) { case WS_BC_STRING_ENC_ISO_8859_1: { WsUtf8String *string = ws_utf8_alloc(); unsigned char *latin1; size_t latin1_len; WsBool success; if (string == NULL) goto error; /* Create an UTF-8 string. */ if (!ws_utf8_set_data(string, bc->constants[ui].u.v_string.data, bc->constants[ui].u.v_string.len)) { ws_utf8_free(string); goto error; } /* Convert it to latin1. */ latin1 = ws_utf8_to_latin1(string, '?', &latin1_len); /* We'r done with the UTF-8 string. */ ws_utf8_free(string); if (latin1 == NULL) goto error; /* Encode it. */ success = ws_encode_buffer( &buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_CONST_EXT_ENC_STRING, WS_ENC_MB_UINT32, (WsUInt32) latin1_len, WS_ENC_DATA, latin1, latin1_len, WS_ENC_END); ws_utf8_free_data(latin1); if (!success) goto error; } break; case WS_BC_STRING_ENC_UTF8: if (!ws_encode_buffer( &buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_CONST_UTF8_STRING, WS_ENC_MB_UINT32, (WsUInt32) bc->constants[ui].u.v_string.len, WS_ENC_DATA, bc->constants[ui].u.v_string.data, bc->constants[ui].u.v_string.len, WS_ENC_END)) goto error; break; } break; case WS_BC_CONST_TYPE_EMPTY_STRING: if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_CONST_EMPTY_STRING, WS_ENC_END)) goto error; break; } } /* Pragmas. */ if (!ws_encode_buffer(&buffer, WS_ENC_MB_UINT16, bc->num_pragmas, WS_ENC_END)) goto error; for (ui = 0; ui < bc->num_pragmas; ui++) { switch (bc->pragmas[ui].type) { case WS_BC_PRAGMA_TYPE_ACCESS_DOMAIN: if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_PRAGMA_ACCESS_DOMAIN, WS_ENC_MB_UINT16, bc->pragmas[ui].index_1, WS_ENC_END)) goto error; break; case WS_BC_PRAGMA_TYPE_ACCESS_PATH: if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_PRAGMA_ACCESS_PATH, WS_ENC_MB_UINT16, bc->pragmas[ui].index_1, WS_ENC_END)) goto error; break; case WS_BC_PRAGMA_TYPE_USER_AGENT_PROPERTY: if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_PRAGMA_USER_AGENT_PROPERTY, WS_ENC_MB_UINT16, bc->pragmas[ui].index_1, WS_ENC_MB_UINT16, bc->pragmas[ui].index_2, WS_ENC_END)) goto error; break; case WS_BC_PRAGMA_TYPE_USER_AGENT_PROPERTY_AND_SCHEME: if (!ws_encode_buffer( &buffer, WS_ENC_UINT8, (WsUInt8) WS_BC_PRAGMA_USER_AGENT_PROPERTY_AND_SCHEME, WS_ENC_MB_UINT16, bc->pragmas[ui].index_1, WS_ENC_MB_UINT16, bc->pragmas[ui].index_2, WS_ENC_MB_UINT16, bc->pragmas[ui].index_3, WS_ENC_END)) goto error; break; } } /* Function pool. */ if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, bc->num_functions, WS_ENC_END)) goto error; /* Function names. */ if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, bc->num_function_names, WS_ENC_END)) goto error; for (ui = 0; ui < bc->num_function_names; ui++) { size_t name_len = strlen(bc->function_names[ui].name); if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, bc->function_names[ui].index, WS_ENC_UINT8, (WsUInt8) name_len, WS_ENC_DATA, bc->function_names[ui].name, name_len, WS_ENC_END)) goto error; } /* Functions. */ for (ui = 0; ui < bc->num_functions; ui++) { if (!ws_encode_buffer(&buffer, WS_ENC_UINT8, bc->functions[ui].num_arguments, WS_ENC_UINT8, bc->functions[ui].num_locals, WS_ENC_MB_UINT32, bc->functions[ui].code_size, WS_ENC_DATA, bc->functions[ui].code, (size_t) bc->functions[ui].code_size, WS_ENC_END)) goto error; } /* Fix the byte-code header. */ p = ws_buffer_ptr(&buffer); /* Encode the size of the byte-code excluding the byte-code header. */ mb = ws_encode_mb_uint32(ws_buffer_len(&buffer) - WS_BC_MAX_HEADER_LEN, data, &len); memcpy(p + WS_BC_MAX_HEADER_LEN - len, mb, len); /* Set the byte-code file version information. */ WS_PUT_UINT8(p + WS_BC_MAX_HEADER_LEN - len - 1, WS_BC_VERSION); /* Calculate the beginning of the bc-array and its size. */ *data_return = p + WS_BC_MAX_HEADER_LEN - len - 1; *data_len_return = ws_buffer_len(&buffer) - WS_BC_MAX_HEADER_LEN + len + 1; /* All done. */ return WS_TRUE; /* * Error handling. */ error: ws_buffer_uninit(&buffer); *data_return = NULL; *data_len_return = 0; return WS_FALSE; }
void ws_asm_linearize(WsCompiler *compiler) { WsAsmIns *ins; WsBool process_again = WS_TRUE; /* Calculate all offsets and select real assembler instructions for our internal pseudo instructions. This is continued as long as the code changes. */ while (process_again) { WsUInt32 offset = 1; process_again = WS_FALSE; for (ins = compiler->asm_head; ins; ins = ins->next) { ins->offset = offset; switch (ins->type) { case WS_ASM_JUMP_FW_S: ins->ws_offset = (ins->ws_label->offset - (offset + WS_OPSIZE(ins->type))); break; case WS_ASM_JUMP_FW: ins->ws_offset = (ins->ws_label->offset - (offset + WS_OPSIZE(ins->type))); if (ins->ws_offset <= 31) { ins->type = WS_ASM_JUMP_FW_S; process_again = WS_TRUE; } break; case WS_ASM_JUMP_FW_W: ins->ws_offset = (ins->ws_label->offset - (offset + WS_OPSIZE(ins->type))); if (ins->ws_offset <= 31) { ins->type = WS_ASM_JUMP_FW_S; process_again = WS_TRUE; } else if (ins->ws_offset <= 255) { ins->type = WS_ASM_JUMP_FW; process_again = WS_TRUE; } break; case WS_ASM_JUMP_BW_S: ins->ws_offset = offset - ins->ws_label->offset; break; case WS_ASM_JUMP_BW: ins->ws_offset = offset - ins->ws_label->offset; if (ins->ws_offset <= 31) { ins->type = WS_ASM_JUMP_BW_S; process_again = WS_TRUE; } break; case WS_ASM_JUMP_BW_W: ins->ws_offset = offset - ins->ws_label->offset; if (ins->ws_offset <= 31) { ins->type = WS_ASM_JUMP_BW_S; process_again = WS_TRUE; } else if (ins->ws_offset <= 255) { ins->type = WS_ASM_JUMP_BW; process_again = WS_TRUE; } break; case WS_ASM_TJUMP_FW_S: ins->ws_offset = (ins->ws_label->offset - (offset + WS_OPSIZE(ins->type))); break; case WS_ASM_TJUMP_FW: ins->ws_offset = (ins->ws_label->offset - (offset + WS_OPSIZE(ins->type))); if (ins->ws_offset <= 31) { ins->type = WS_ASM_TJUMP_FW_S; process_again = WS_TRUE; } break; case WS_ASM_TJUMP_FW_W: ins->ws_offset = (ins->ws_label->offset - (offset + WS_OPSIZE(ins->type))); if (ins->ws_offset <= 31) { ins->type = WS_ASM_TJUMP_FW_S; process_again = WS_TRUE; } else if (ins->ws_offset <= 255) { ins->type = WS_ASM_TJUMP_FW; process_again = WS_TRUE; } break; case WS_ASM_TJUMP_BW: ins->ws_offset = offset - ins->ws_label->offset; break; case WS_ASM_TJUMP_BW_W: ins->ws_offset = offset - ins->ws_label->offset; if (ins->ws_offset <= 255) { ins->type = WS_ASM_TJUMP_BW; process_again = WS_TRUE; } break; /* * The pseudo instructions. */ case WS_ASM_P_LABEL: /* Nothing here. */ break; case WS_ASM_P_JUMP: if (ins->ws_label->offset == 0) { /* A forward jump. Let's assume the widest form. */ ins->type = WS_ASM_JUMP_FW_W; } else { ins->ws_offset = offset - ins->ws_label->offset; /* Jump backwards. */ if (ins->ws_offset <= 31) { ins->type = WS_ASM_JUMP_BW_S; } else if (ins->ws_offset <= 255) { ins->type = WS_ASM_JUMP_BW; } else { ins->type = WS_ASM_JUMP_BW_W; } } break; case WS_ASM_P_TJUMP: if (ins->ws_label->offset == 0) { /* A forward jump. Let's assume the widest form. */ ins->type = WS_ASM_TJUMP_FW_W; process_again = WS_TRUE; } else { ins->ws_offset = offset - ins->ws_label->offset; /* Jump backwards. */ if (ins->ws_offset <= 255) { ins->type = WS_ASM_TJUMP_BW; } else { ins->type = WS_ASM_TJUMP_BW_W; } } break; case WS_ASM_P_CALL: if (ins->ws_findex <= 7) { /* The most compact form. */ ins->type = WS_ASM_CALL_S; } else { /* The wider form. */ ins->type = WS_ASM_CALL; } break; case WS_ASM_P_CALL_LIB: if (ins->ws_findex <= 7 && ins->ws_lindex <= 255) { /* The most compact form. */ ins->type = WS_ASM_CALL_LIB_S; } else if (ins->ws_findex <= 255 && ins->ws_lindex <= 255) { /* The quite compact form. */ ins->type = WS_ASM_CALL_LIB; } else { /* The most liberal form. */ ins->type = WS_ASM_CALL_LIB_W; } break; case WS_ASM_P_CALL_URL: if (ins->ws_findex <= 255 && ins->ws_lindex <= 255) /* The compact form. */ ins->type = WS_ASM_CALL_URL; else ins->type = WS_ASM_CALL_URL_W; break; case WS_ASM_P_LOAD_VAR: if (ins->ws_vindex <= 31) /* The compact form. */ ins->type = WS_ASM_LOAD_VAR_S; else ins->type = WS_ASM_LOAD_VAR; break; case WS_ASM_P_STORE_VAR: if (ins->ws_vindex <= 15) ins->type = WS_ASM_STORE_VAR_S; else ins->type = WS_ASM_STORE_VAR; break; case WS_ASM_P_INCR_VAR: if (ins->ws_vindex <= 7) ins->type = WS_ASM_INCR_VAR_S; else ins->type = WS_ASM_INCR_VAR; break; case WS_ASM_P_LOAD_CONST: if (ins->ws_cindex <= 15) ins->type = WS_ASM_LOAD_CONST_S; else if (ins->ws_cindex <= 255) ins->type = WS_ASM_LOAD_CONST; else ins->type = WS_ASM_LOAD_CONST_W; break; } gw_assert(ins->type == WS_ASM_P_LABEL || ins->type < 0x100); if (ins->type != WS_ASM_P_LABEL) { gw_assert(operands[ins->type].name != NULL); offset += operands[ins->type].size; } } } /* Ok, ready to linearize the byte-code. */ for (ins = compiler->asm_head; ins; ins = ins->next) { if (ins->type == WS_ASM_P_LABEL) continue; gw_assert(ins->type <= 0xff); switch (ins->type) { case WS_ASM_JUMP_FW_S: case WS_ASM_JUMP_BW_S: case WS_ASM_TJUMP_FW_S: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, WS_ASM_GLUE(ins->type, ins->ws_offset), WS_ENC_END)) goto error; break; case WS_ASM_JUMP_FW: case WS_ASM_JUMP_BW: case WS_ASM_TJUMP_FW: case WS_ASM_TJUMP_BW: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_offset, WS_ENC_END)) goto error; break; case WS_ASM_JUMP_FW_W: case WS_ASM_JUMP_BW_W: case WS_ASM_TJUMP_FW_W: case WS_ASM_TJUMP_BW_W: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, ins->type, WS_ENC_UINT16, (WsUInt16) ins->ws_offset, WS_ENC_END)) goto error; break; case WS_ASM_CALL_S: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, WS_ASM_GLUE(ins->type, ins->ws_findex), WS_ENC_END)) goto error; break; case WS_ASM_CALL: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_findex, WS_ENC_END)) goto error; break; case WS_ASM_CALL_LIB_S: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, WS_ASM_GLUE(ins->type, ins->ws_findex), WS_ENC_UINT8, (WsUInt8) ins->ws_lindex, WS_ENC_END)) goto error; break; case WS_ASM_CALL_LIB: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_findex, WS_ENC_UINT8, (WsUInt8) ins->ws_lindex, WS_ENC_END)) goto error; break; case WS_ASM_CALL_LIB_W: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_findex, WS_ENC_UINT16, (WsUInt16) ins->ws_lindex, WS_ENC_END)) goto error; break; case WS_ASM_CALL_URL: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_lindex, WS_ENC_UINT8, (WsUInt8) ins->ws_findex, WS_ENC_UINT8, (WsUInt8) ins->ws_args, WS_ENC_END)) goto error; break; case WS_ASM_CALL_URL_W: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT16, (WsUInt16) ins->ws_lindex, WS_ENC_UINT16, (WsUInt16) ins->ws_findex, WS_ENC_UINT8, (WsUInt8) ins->ws_args, WS_ENC_END)) goto error; break; case WS_ASM_LOAD_VAR_S: case WS_ASM_STORE_VAR_S: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, WS_ASM_GLUE(ins->type, ins->ws_vindex), WS_ENC_END)) goto error; break; case WS_ASM_LOAD_VAR: case WS_ASM_STORE_VAR: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_vindex, WS_ENC_END)) goto error; break; case WS_ASM_INCR_VAR_S: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, WS_ASM_GLUE(ins->type, ins->ws_vindex), WS_ENC_END)) goto error; break; case WS_ASM_INCR_VAR: case WS_ASM_DECR_VAR: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_vindex, WS_ENC_END)) goto error; break; case WS_ASM_LOAD_CONST_S: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, WS_ASM_GLUE(ins->type, ins->ws_cindex), WS_ENC_END)) goto error; break; case WS_ASM_LOAD_CONST: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_cindex, WS_ENC_END)) goto error; break; case WS_ASM_LOAD_CONST_W: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT16, (WsUInt16) ins->ws_cindex, WS_ENC_END)) goto error; break; case WS_ASM_ADD_ASG: case WS_ASM_SUB_ASG: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_UINT8, (WsUInt8) ins->ws_vindex, WS_ENC_END)) goto error; break; case WS_ASM_CONST_0: case WS_ASM_CONST_1: case WS_ASM_CONST_M1: case WS_ASM_CONST_ES: case WS_ASM_CONST_INVALID: case WS_ASM_CONST_TRUE: case WS_ASM_CONST_FALSE: case WS_ASM_INCR: case WS_ASM_DECR: case WS_ASM_UMINUS: case WS_ASM_ADD: case WS_ASM_SUB: case WS_ASM_MUL: case WS_ASM_DIV: case WS_ASM_IDIV: case WS_ASM_REM: case WS_ASM_B_AND: case WS_ASM_B_OR: case WS_ASM_B_XOR: case WS_ASM_B_NOT: case WS_ASM_B_LSHIFT: case WS_ASM_B_RSSHIFT: case WS_ASM_B_RSZSHIFT: case WS_ASM_EQ: case WS_ASM_LE: case WS_ASM_LT: case WS_ASM_GE: case WS_ASM_GT: case WS_ASM_NE: case WS_ASM_NOT: case WS_ASM_SCAND: case WS_ASM_SCOR: case WS_ASM_TOBOOL: case WS_ASM_POP: case WS_ASM_TYPEOF: case WS_ASM_ISVALID: case WS_ASM_RETURN: case WS_ASM_RETURN_ES: case WS_ASM_DEBUG: if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) ins->type, WS_ENC_END)) goto error; break; default: ws_fatal("ws_asm_linearize(): unknown instruction 0x%02x", ins->type); break; } } /* * Avoid generating 0-length functions, because not all clients * handle them correctly. */ if (ws_buffer_len(&compiler->byte_code) == 0) { if (!ws_encode_buffer(&compiler->byte_code, WS_ENC_BYTE, (WsByte) WS_ASM_RETURN_ES, WS_ENC_END)) goto error; } return; /* * Error handling. */ error: ws_error_memory(compiler); return; }
static WsResult compile_stream(WsCompilerPtr compiler, const char *input_name, WsStream *input, unsigned char **output_return, size_t *output_len_return) { WsResult result = WS_OK; WsUInt32 i; WsListItem *li; WsUInt8 findex; WsUInt8 num_locals; WsBcStringEncoding string_encoding = WS_BC_STRING_ENC_UTF8; /* Initialize the compiler context. */ compiler->linenum = 1; compiler->input_name = input_name; compiler->num_errors = 0; compiler->num_warnings = 0; compiler->num_extern_functions = 0; compiler->num_local_functions = 0; compiler->errors = 0; compiler->last_syntax_error_line = 0; /* Allocate fast-malloc pool for the syntax tree. */ compiler->pool_stree = ws_f_create(1024 * 1024); if (compiler->pool_stree == NULL) { result = WS_ERROR_OUT_OF_MEMORY; goto out; } /* Allocate hash tables. */ compiler->pragma_use_hash = ws_pragma_use_hash_create(); if (compiler->pragma_use_hash == NULL) { result = WS_ERROR_OUT_OF_MEMORY; goto out; } compiler->functions_hash = ws_function_hash_create(); if (compiler->functions_hash == NULL) { result = WS_ERROR_OUT_OF_MEMORY; goto out; } /* Allocate a byte-code module. */ if (compiler->params.use_latin1_strings) string_encoding = WS_BC_STRING_ENC_ISO_8859_1; compiler->bc = ws_bc_alloc(string_encoding); if (compiler->bc == NULL) { result = WS_ERROR_OUT_OF_MEMORY; goto out; } /* Save the input stream. */ compiler->input = input; /* Parse the input. */ #if WS_DEBUG global_compiler = compiler; #endif /* WS_DEBUG */ ws_yy_parse(compiler); /* Free all lexer's active not freed blocks. If we have any blocks on the used list, our compilation was not successful. */ { size_t j; for (j = 0; j < compiler->lexer_active_list_size; j++) ws_free(compiler->lexer_active_list[j]); ws_free(compiler->lexer_active_list); compiler->lexer_active_list = NULL; } WS_CHECK_COMPILE_ERROR(); /* Sort functions if allowed and it helps. */ if (!compiler->params.no_opt_sort_bc_functions && compiler->num_functions > 7) { WsUInt32 i; ws_info(compiler, "optimize: sorting functions"); /* Fetch the usage counts from the functions hash. */ for (i = 0; i < compiler->num_functions; i++) { WsFunctionHash *fh = ws_function_hash(compiler, compiler->functions[i].name); compiler->functions[i].usage_count = fh->usage_count; } /* Sort functions. */ qsort(compiler->functions, compiler->num_functions, sizeof(compiler->functions[0]), sort_functions_cmp); /* Patch the function indexes. */ for (i = 0; i < compiler->num_functions; i++) { WsFunctionHash *fh = ws_function_hash(compiler, compiler->functions[i].name); compiler->functions[i].findex = i; fh->findex = i; } } /* Linearize functions */ for (i = 0; i < compiler->num_functions; i++) { WsFunction *func = &compiler->functions[i]; ws_info(compiler, "linearizing function `%s'...", func->name); compiler->pool_asm = ws_f_create(100 * 1024); if (compiler->pool_asm == NULL) { result = WS_ERROR_OUT_OF_MEMORY; goto out; } compiler->next_label = 0; compiler->asm_head = compiler->asm_tail = NULL; /* Create variables namespace. */ compiler->next_vindex = 0; compiler->variables_hash = ws_variable_hash_create(); if (compiler->variables_hash == NULL) { result = WS_ERROR_OUT_OF_MEMORY; goto out; } /* Define the formal arguments to the namespace. */ for (li = func->params->head; li; li = li->next) { WsFormalParm *parm = li->data; ws_variable_define(compiler, parm->line, WS_FALSE, parm->name); } WS_CHECK_COMPILE_ERROR(); /* Linearize it. */ for (li = func->block->head; li; li = li->next) ws_stmt_linearize(compiler, li->data); WS_CHECK_COMPILE_ERROR(); /* Optimize symbolic assembler. This function does nothing if no optimizations were requested. */ ws_asm_optimize(compiler); /* Print the resulting symbolic assembler if requested. */ if (compiler->params.print_symbolic_assembler) ws_asm_print(compiler); WS_CHECK_COMPILE_ERROR(); /* Generate byte-code */ ws_buffer_init(&compiler->byte_code); ws_asm_linearize(compiler); WS_CHECK_COMPILE_ERROR(); /* Disassemble the output if requested. */ if (compiler->params.print_assembler) ws_asm_dasm(compiler, ws_buffer_ptr(&compiler->byte_code), ws_buffer_len(&compiler->byte_code)); /* Calculate the number of local variables */ num_locals = compiler->next_vindex - func->params->num_items; /* Add the function to the byte-code module. */ if (!ws_bc_add_function(compiler->bc, &findex, func->externp ? func->name : NULL, func->params->num_items, num_locals, ws_buffer_len(&compiler->byte_code), ws_buffer_ptr(&compiler->byte_code))) { result = WS_ERROR_OUT_OF_MEMORY; goto out; } /* Cleanup and prepare for the next function. */ ws_buffer_uninit(&compiler->byte_code); ws_hash_destroy(compiler->variables_hash); compiler->variables_hash = NULL; ws_f_destroy(compiler->pool_asm); compiler->pool_asm = NULL; } /* Linearize the byte-code structure. */ if (!ws_bc_encode(compiler->bc, output_return, output_len_return)) result = WS_ERROR_OUT_OF_MEMORY; out: /* Cleanup. */ ws_f_destroy(compiler->pool_stree); compiler->pool_stree = NULL; ws_hash_destroy(compiler->pragma_use_hash); compiler->pragma_use_hash = NULL; /* Free functions. */ for (i = 0; i < compiler->num_functions; i++) ws_free(compiler->functions[i].name); ws_free(compiler->functions); ws_hash_destroy(compiler->functions_hash); compiler->functions_hash = NULL; ws_bc_free(compiler->bc); compiler->bc = NULL; compiler->input = NULL; ws_f_destroy(compiler->pool_asm); compiler->pool_asm = NULL; ws_hash_destroy(compiler->variables_hash); compiler->variables_hash = NULL; ws_buffer_uninit(&compiler->byte_code); /* All done. */ return result; }
int ws_yy_lex(YYSTYPE *yylval, YYLTYPE *yylloc, void *context) { WsCompiler *compiler = (WsCompiler *) context; WsUInt32 ch, ch2; WsBuffer buffer; unsigned char *p; WsBool success; /* Just check that we get the correct amount of arguments. */ gw_assert(compiler->magic == COMPILER_MAGIC); while (ws_stream_getc(compiler->input, &ch)) { /* Save the token's line number. */ yylloc->first_line = compiler->linenum; switch (ch) { case '\t': /* Whitespace characters. */ case '\v': case '\f': case ' ': continue; case '\n': /* Line terminators. */ case '\r': if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) { if (ch2 != '\n') ws_stream_ungetc(compiler->input, ch2); } compiler->linenum++; continue; case '!': /* !, != */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tNE; ws_stream_ungetc(compiler->input, ch2); } return '!'; case '%': /* %, %= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tREMA; ws_stream_ungetc(compiler->input, ch2); } return '%'; case '&': /* &, &&, &= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '&') return tAND; if (ch2 == '=') return tANDA; ws_stream_ungetc(compiler->input, ch2); } return '&'; case '*': /* *, *= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tMULA; ws_stream_ungetc(compiler->input, ch2); } return '*'; case '+': /* +, ++, += */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '+') return tPLUSPLUS; if (ch2 == '=') return tADDA; ws_stream_ungetc(compiler->input, ch2); } return '+'; case '-': /* -, --, -= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '-') return tMINUSMINUS; if (ch2 == '=') return tSUBA; ws_stream_ungetc(compiler->input, ch2); } return '-'; case '.': if (ws_stream_getc(compiler->input, &ch2)) { if (WS_IS_DECIMAL_DIGIT(ch2)) { /* DecimalFloatLiteral. */ ws_buffer_init(&buffer); if (!ws_buffer_append_space(&buffer, &p, 2)) { ws_error_memory(compiler); ws_buffer_uninit(&buffer); return EOF; } p[0] = '.'; p[1] = (unsigned char) ch2; success = read_float_from_point(compiler, &buffer, &yylval->vfloat); ws_buffer_uninit(&buffer); if (!success) return EOF; return tFLOAT; } ws_stream_ungetc(compiler->input, ch2); } return '.'; case '/': /* /, /=, block or a single line comment */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '*') { /* Block comment. */ while (1) { if (!ws_stream_getc(compiler->input, &ch)) { ws_src_error(compiler, 0, "EOF in comment"); return EOF; } if (ch == '\n' || ch == '\r') { /* Line terminators. */ if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) { if (ch2 != '\n') ws_stream_ungetc(compiler->input, ch2); } compiler->linenum++; /* Continue reading the block comment. */ continue; } if (ch == '*' && ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '/') /* The end of the comment found. */ break; ws_stream_ungetc(compiler->input, ch2); } } /* Continue after the comment. */ continue; } if (ch2 == '/') { /* Single line comment. */ while (1) { if (!ws_stream_getc(compiler->input, &ch)) /* The end of input stream reached. We accept this as a valid comment terminator. */ break; if (ch == '\n' || ch == '\r') { /* Line terminators. */ if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) { if (ch2 != '\n') ws_stream_ungetc(compiler->input, ch2); } /* The end of the line (and the comment) reached. */ compiler->linenum++; break; } } /* Continue after the comment. */ continue; } if (ch2 == '=') return tDIVA; ws_stream_ungetc(compiler->input, ch2); } return '/'; case '<': /* <, <<, <<=, <= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '<') { if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tLSHIFTA; ws_stream_ungetc(compiler->input, ch2); } return tLSHIFT; } if (ch2 == '=') return tLE; ws_stream_ungetc(compiler->input, ch2); } return '<'; case '=': /* =, == */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tEQ; ws_stream_ungetc(compiler->input, ch2); } return '='; case '>': /* >, >=, >>, >>=, >>>, >>>= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '>') { if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '>') { if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tRSZSHIFTA; ws_stream_ungetc(compiler->input, ch2); } return tRSZSHIFT; } if (ch2 == '=') return tRSSHIFTA; ws_stream_ungetc(compiler->input, ch2); } return tRSSHIFT; } if (ch2 == '=') return tGE; ws_stream_ungetc(compiler->input, ch2); } return '>'; case '^': /* ^, ^= */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tXORA; ws_stream_ungetc(compiler->input, ch2); } return '^'; case '|': /* |, |=, || */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == '=') return tORA; if (ch2 == '|') return tOR; ws_stream_ungetc(compiler->input, ch2); } return '|'; case '#': /* The simple cases. */ case '(': case ')': case ',': case ':': case ';': case '?': case '{': case '}': case '~': return (int) ch; case '\'': /* String literals. */ case '"': { WsUInt32 string_end_ch = ch; WsUtf8String *str = ws_utf8_alloc(); if (str == NULL) { ws_error_memory(compiler); return EOF; } while (1) { if (!ws_stream_getc(compiler->input, &ch)) { eof_in_string_literal: ws_src_error(compiler, 0, "EOF in string literal"); ws_utf8_free(str); return EOF; } if (ch == string_end_ch) /* The end of string reached. */ break; if (ch == '\\') { /* An escape sequence. */ if (!ws_stream_getc(compiler->input, &ch)) goto eof_in_string_literal; switch (ch) { case '\'': case '"': case '\\': case '/': /* The character as-is. */ break; case 'b': ch = '\b'; break; case 'f': ch = '\f'; break; case 'n': ch = '\n'; break; case 'r': ch = '\r'; break; case 't': ch = '\t'; break; case 'x': case 'u': { int i, len; int type = ch; if (ch == 'x') len = 2; else len = 4; ch = 0; for (i = 0; i < len; i++) { if (!ws_stream_getc(compiler->input, &ch2)) goto eof_in_string_literal; if (!WS_IS_HEX_DIGIT(ch2)) { ws_src_error(compiler, 0, "malformed `\\%c' escape in " "string literal", (char) type); ch = 0; break; } ch *= 16; ch += WS_HEX_TO_INT(ch2); } } break; default: if (WS_IS_OCTAL_DIGIT(ch)) { int i; int limit = 3; ch = WS_OCTAL_TO_INT(ch); if (ch > 3) limit = 2; for (i = 1; i < limit; i++) { if (!ws_stream_getc(compiler->input, &ch2)) goto eof_in_string_literal; if (!WS_IS_OCTAL_DIGIT(ch2)) { ws_stream_ungetc(compiler->input, ch2); break; } ch *= 8; ch += WS_OCTAL_TO_INT(ch2); } } else { ws_src_error(compiler, 0, "unknown escape sequence `\\%c' in " "string literal", (char) ch); ch = 0; } break; } /* FALLTHROUGH */ } if (!ws_utf8_append_char(str, ch)) { ws_error_memory(compiler); ws_utf8_free(str); return EOF; } } if (!ws_lexer_register_utf8(compiler, str)) { ws_error_memory(compiler); ws_utf8_free(str); return EOF; } gw_assert(str != NULL); yylval->string = str; return tSTRING; } break; default: /* Identifiers, keywords and number constants. */ if (WS_IS_IDENTIFIER_LETTER(ch)) { WsBool got; int token; unsigned char *p; unsigned char *np; size_t len = 0; /* An identifier or a keyword. We start with a 256 * bytes long buffer but it is expanded dynamically if * needed. However, 256 should be enought for most * cases since the byte-code format limits the function * names to 255 characters. */ p = ws_malloc(256); if (p == NULL) { ws_error_memory(compiler); return EOF; } do { /* Add one extra for the possible terminator character. */ np = ws_realloc(p, len + 2); if (np == NULL) { ws_error_memory(compiler); ws_free(p); return EOF; } p = np; /* This is ok since the only valid identifier names * can be written in 7 bit ASCII. */ p[len++] = (unsigned char) ch; } while ((got = ws_stream_getc(compiler->input, &ch)) && (WS_IS_IDENTIFIER_LETTER(ch) || WS_IS_DECIMAL_DIGIT(ch))); if (got) /* Put back the terminator character. */ ws_stream_ungetc(compiler->input, ch); /* Is it a keyword? */ if (lookup_keyword((char *) p, len, &token)) { /* Yes it is... */ ws_free(p); /* ...except one case: `div='. */ if (token == tIDIV) { if (ws_stream_getc(compiler->input, &ch)) { if (ch == '=') return tIDIVA; ws_stream_ungetc(compiler->input, ch); } } /* Return the token value. */ return token; } /* It is a normal identifier. Let's pad the name with a null-character. We have already allocated space for it. */ p[len] = '\0'; if (!ws_lexer_register_block(compiler, p)) { ws_error_memory(compiler); ws_free(p); return EOF; } gw_assert(p != NULL); yylval->identifier = (char *) p; return tIDENTIFIER; } if (WS_IS_NON_ZERO_DIGIT(ch)) { /* A decimal integer literal or a decimal float literal. */ ws_buffer_init(&buffer); if (!ws_buffer_append_space(&buffer, &p, 1)) { number_error_memory: ws_error_memory(compiler); ws_buffer_uninit(&buffer); return EOF; } p[0] = ch; while (ws_stream_getc(compiler->input, &ch)) { if (WS_IS_DECIMAL_DIGIT(ch)) { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = ch; } else if (ch == '.' || ch == 'e' || ch == 'E') { /* DecimalFloatLiteral. */ if (ch == '.') { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = '.'; success = read_float_from_point(compiler, &buffer, &yylval->vfloat); } else { ws_stream_ungetc(compiler->input, ch); success = read_float_from_exp(compiler, &buffer, &yylval->vfloat); } ws_buffer_uninit(&buffer); if (!success) return EOF; return tFLOAT; } else { ws_stream_ungetc(compiler->input, ch); break; } } /* Now the buffer contains an integer number as a string. Let's convert it to an integer number. */ yylval->integer = buffer_to_int(compiler, &buffer); ws_buffer_uninit(&buffer); /* Read a DecimalIntegerLiteral. */ return tINTEGER; } if (ch == '0') { /* The integer constant 0, an octal number or a HexIntegerLiteral. */ if (ws_stream_getc(compiler->input, &ch2)) { if (ch2 == 'x' || ch2 == 'X') { /* HexIntegerLiteral. */ ws_buffer_init(&buffer); if (!ws_buffer_append_space(&buffer, &p, 2)) goto number_error_memory; p[0] = '0'; p[1] = 'x'; while (ws_stream_getc(compiler->input, &ch)) { if (WS_IS_HEX_DIGIT(ch)) { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = ch; } else { ws_stream_ungetc(compiler->input, ch); break; } } if (ws_buffer_len(&buffer) == 2) { ws_buffer_uninit(&buffer); ws_src_error(compiler, 0, "numeric constant with no digits"); yylval->integer = 0; return tINTEGER; } /* Now the buffer contains an integer number as * a string. Let's convert it to an integer * number. */ yylval->integer = buffer_to_int(compiler, &buffer); ws_buffer_uninit(&buffer); /* Read a HexIntegerLiteral. */ return tINTEGER; } if (WS_IS_OCTAL_DIGIT(ch2)) { /* OctalIntegerLiteral. */ ws_buffer_init(&buffer); if (!ws_buffer_append_space(&buffer, &p, 2)) goto number_error_memory; p[0] = '0'; p[1] = ch2; while (ws_stream_getc(compiler->input, &ch)) { if (WS_IS_OCTAL_DIGIT(ch)) { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = ch; } else { ws_stream_ungetc(compiler->input, ch); break; } } /* Convert the buffer into an intger number. */ yylval->integer = buffer_to_int(compiler, &buffer); ws_buffer_uninit(&buffer); /* Read an OctalIntegerLiteral. */ return tINTEGER; } if (ch2 == '.' || ch2 == 'e' || ch2 == 'E') { /* DecimalFloatLiteral. */ ws_buffer_init(&buffer); if (ch2 == '.') { if (!ws_buffer_append_space(&buffer, &p, 1)) goto number_error_memory; p[0] = '.'; success = read_float_from_point(compiler, &buffer, &yylval->vfloat); } else { ws_stream_ungetc(compiler->input, ch); success = read_float_from_exp(compiler, &buffer, &yylval->vfloat); } ws_buffer_uninit(&buffer); if (!success) return EOF; return tFLOAT; } ws_stream_ungetc(compiler->input, ch2); } /* Integer literal 0. */ yylval->integer = 0; return tINTEGER; } /* Garbage found from the input stream. */ ws_src_error(compiler, 0, "garbage found from the input stream: character=0x%x", ch); return EOF; break; } } return EOF; }