NS_InvokeByIndex(nsISupports* that, PRUint32 methodIndex, PRUint32 paramCount, nsXPTCVariant* params) { vtable_func *vtable = *reinterpret_cast<vtable_func **>(that); vtable_func method = vtable[methodIndex]; PRUint32 overflow = invoke_count_words (paramCount, params); PRUint32 *stack_space = reinterpret_cast<PRUint32 *>(__builtin_alloca((overflow + 8 /* 4 32-bits gpr + 2 64-bits fpr */) * 4)); invoke_copy_to_stack(paramCount, params, stack_space, overflow); PRUint32 *d_gpr = stack_space + overflow; double *d_fpr = reinterpret_cast<double *>(d_gpr + 4); return method(that, d_gpr[0], d_gpr[1], d_gpr[2], d_gpr[3], d_fpr[0], d_fpr[1]); }
NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, uint32_t paramCount, nsXPTCVariant* params) { vtable_func *vtable = *reinterpret_cast<vtable_func **>(that); vtable_func method = vtable[methodIndex]; uint64_t overflow = invoke_count_words (paramCount, params); uint64_t *stack_space = reinterpret_cast<uint64_t *>(__builtin_alloca((overflow + 8 /* 4 64-bits gpr + 4 64-bits fpr */) * 8)); uint64_t result; invoke_copy_to_stack(paramCount, params, stack_space, overflow); uint64_t *d_gpr = stack_space + overflow; double *d_fpr = reinterpret_cast<double *>(d_gpr + 4); return method(that, d_gpr[0], d_gpr[1], d_gpr[2], d_gpr[3], d_fpr[0], d_fpr[1], d_fpr[2], d_fpr[3]); }
NS_InvokeByIndex_P(nsISupports * that, uint32_t methodIndex, uint32_t paramCount, nsXPTCVariant * params) { uint32_t nr_gpr, nr_fpr, nr_stack; invoke_count_words(paramCount, params, nr_gpr, nr_fpr, nr_stack); // Stack, if used, must be 16-bytes aligned if (nr_stack) nr_stack = (nr_stack + 1) & ~1; // Load parameters to stack, if necessary uint64_t *stack = (uint64_t *) __builtin_alloca(nr_stack * 8); uint64_t gpregs[GPR_COUNT]; double fpregs[FPR_COUNT]; invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs); // Load FPR registers from fpregs[] register double d0 asm("xmm0"); register double d1 asm("xmm1"); register double d2 asm("xmm2"); register double d3 asm("xmm3"); register double d4 asm("xmm4"); register double d5 asm("xmm5"); register double d6 asm("xmm6"); register double d7 asm("xmm7"); switch (nr_fpr) { #define ARG_FPR(N) \ case N+1: d##N = fpregs[N]; ARG_FPR(7); ARG_FPR(6); ARG_FPR(5); ARG_FPR(4); ARG_FPR(3); ARG_FPR(2); ARG_FPR(1); ARG_FPR(0); case 0:; #undef ARG_FPR } // Load GPR registers from gpregs[] register uint64_t a0 asm("rdi"); register uint64_t a1 asm("rsi"); register uint64_t a2 asm("rdx"); register uint64_t a3 asm("rcx"); register uint64_t a4 asm("r8"); register uint64_t a5 asm("r9"); switch (nr_gpr) { #define ARG_GPR(N) \ case N+1: a##N = gpregs[N]; ARG_GPR(5); ARG_GPR(4); ARG_GPR(3); ARG_GPR(2); ARG_GPR(1); case 1: a0 = (uint64_t) that; case 0:; #undef ARG_GPR } // Ensure that assignments to SSE registers won't be optimized away asm("" :: "x" (d0), "x" (d1), "x" (d2), "x" (d3), "x" (d4), "x" (d5), "x" (d6), "x" (d7)); // Get pointer to method uint64_t methodAddress = *((uint64_t *)that); methodAddress += 8 * methodIndex; methodAddress = *((uint64_t *)methodAddress); typedef uint32_t (*Method)(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); uint32_t result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5); return result; }
NS_InvokeByIndex(nsISupports * that, uint32_t methodIndex, uint32_t paramCount, nsXPTCVariant * params) { uint32_t nr_stack; invoke_count_words(paramCount, params, nr_stack); // Stack, if used, must be 16-bytes aligned if (nr_stack) nr_stack = (nr_stack + 1) & ~1; // Load parameters to stack, if necessary uint64_t *stack = (uint64_t *) __builtin_alloca(nr_stack * 8); uint64_t gpregs[GPR_COUNT]; double fpregs[FPR_COUNT]; invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs); // We used to have switches to make sure we would only load the registers // that are needed for this call. That produced larger code that was // not faster in practice. It also caused compiler warnings about the // variables being used uninitialized. // We now just load every every register. There could still be a warning // from a memory analysis tools that we are loading uninitialized stack // positions. // FIXME: this function depends on the above __builtin_alloca placing // the array in the correct spot for the ABI. // Load FPR registers from fpregs[] double d0, d1, d2, d3, d4, d5, d6, d7; d7 = fpregs[7]; d6 = fpregs[6]; d5 = fpregs[5]; d4 = fpregs[4]; d3 = fpregs[3]; d2 = fpregs[2]; d1 = fpregs[1]; d0 = fpregs[0]; // Load GPR registers from gpregs[] uint64_t a0, a1, a2, a3, a4, a5; a5 = gpregs[5]; a4 = gpregs[4]; a3 = gpregs[3]; a2 = gpregs[2]; a1 = gpregs[1]; a0 = (uint64_t) that; // Get pointer to method uint64_t methodAddress = *((uint64_t *)that); methodAddress += 8 * methodIndex; methodAddress = *((uint64_t *)methodAddress); typedef nsresult (*Method)(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, double, double, double, double, double, double, double, double); nsresult result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5, d0, d1, d2, d3, d4, d5, d6, d7); return result; }
NS_InvokeByIndex_P(nsISupports * that, PRUint32 methodIndex, PRUint32 paramCount, nsXPTCVariant * params) { PRUint32 nr_gpr, nr_fpr, nr_stack; invoke_count_words(paramCount, params, nr_gpr, nr_fpr, nr_stack); // Stack, if used, must be 16-bytes aligned if (nr_stack) nr_stack = (nr_stack + 1) & ~1; // Load parameters to stack, if necessary PRUint64 *stack = (PRUint64 *) __builtin_alloca(nr_stack * 8); PRUint64 gpregs[GPR_COUNT]; double fpregs[FPR_COUNT]; invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs); // Load FPR registers from fpregs[] double d0, d1, d2, d3, d4, d5, d6, d7; switch (nr_fpr) { #define ARG_FPR(N) \ case N+1: d##N = fpregs[N]; ARG_FPR(7); ARG_FPR(6); ARG_FPR(5); ARG_FPR(4); ARG_FPR(3); ARG_FPR(2); ARG_FPR(1); ARG_FPR(0); case 0:; #undef ARG_FPR } // Load GPR registers from gpregs[] PRUint64 a0, a1, a2, a3, a4, a5; switch (nr_gpr) { #define ARG_GPR(N) \ case N+1: a##N = gpregs[N]; ARG_GPR(5); ARG_GPR(4); ARG_GPR(3); ARG_GPR(2); ARG_GPR(1); case 1: a0 = (PRUint64) that; case 0:; #undef ARG_GPR } // Get pointer to method PRUint64 methodAddress = *((PRUint64 *)that); methodAddress += 8 * methodIndex; methodAddress = *((PRUint64 *)methodAddress); typedef PRUint32 (*Method)(PRUint64, PRUint64, PRUint64, PRUint64, PRUint64, PRUint64, double, double, double, double, double, double, double, double); PRUint32 result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5, d0, d1, d2, d3, d4, d5, d6, d7); return result; }
XPTC_InvokeByIndex(nsISupports * that, PRUint32 methodIndex, PRUint32 paramCount, nsXPTCVariant * params) { PRUint32 nr_gpr, nr_fpr, nr_stack; invoke_count_words(paramCount, params, nr_gpr, nr_fpr, nr_stack); // Stack, if used, must be 16-bytes aligned if (nr_stack) nr_stack = (nr_stack + 1) & ~1; // Load parameters to stack, if necessary PRUint64 *stack = (PRUint64 *) __builtin_alloca(nr_stack * 8); PRUint64 gpregs[GPR_COUNT]; double fpregs[FPR_COUNT]; invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs); // disable the warning about sometimes not initialized variables which is hit // when we pass less than 8 XMM or less than 6 GPR registers. #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wsometimes-uninitialized" // Load FPR registers from fpregs[] register double d0 asm("xmm0"); register double d1 asm("xmm1"); register double d2 asm("xmm2"); register double d3 asm("xmm3"); register double d4 asm("xmm4"); register double d5 asm("xmm5"); register double d6 asm("xmm6"); register double d7 asm("xmm7"); switch (nr_fpr) { #define ARG_FPR(N) \ case N+1: d##N = fpregs[N]; ARG_FPR(7); ARG_FPR(6); ARG_FPR(5); ARG_FPR(4); ARG_FPR(3); ARG_FPR(2); ARG_FPR(1); ARG_FPR(0); case 0:; #undef ARG_FPR } // Load GPR registers from gpregs[] register PRUint64 a0 asm("rdi"); register PRUint64 a1 asm("rsi"); register PRUint64 a2 asm("rdx"); register PRUint64 a3 asm("rcx"); register PRUint64 a4 asm("r8"); register PRUint64 a5 asm("r9"); switch (nr_gpr) { #define ARG_GPR(N) \ case N+1: a##N = gpregs[N]; ARG_GPR(5); ARG_GPR(4); ARG_GPR(3); ARG_GPR(2); ARG_GPR(1); case 1: a0 = (PRUint64) that; case 0:; #undef ARG_GPR } // Ensure that assignments to SSE registers won't be optimized away asm("" :: "x" (d0), "x" (d1), "x" (d2), "x" (d3), "x" (d4), "x" (d5), "x" (d6), "x" (d7)); // Get pointer to method PRUint64 methodAddress = *((PRUint64 *)that); methodAddress += 8 * methodIndex; methodAddress = *((PRUint64 *)methodAddress); typedef PRUint32 (*Method)(PRUint64, PRUint64, PRUint64, PRUint64, PRUint64, PRUint64); PRUint32 result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5); return result; #pragma clang diagnostic pop }