NS_InvokeByIndex(nsISupports* that, PRUint32 methodIndex,
                 PRUint32 paramCount, nsXPTCVariant* params)
{
    vtable_func *vtable = *reinterpret_cast<vtable_func **>(that);
    vtable_func method = vtable[methodIndex];
    PRUint32 overflow = invoke_count_words (paramCount, params);
    PRUint32 *stack_space = reinterpret_cast<PRUint32 *>(__builtin_alloca((overflow + 8 /* 4 32-bits gpr + 2 64-bits fpr */) * 4));

    invoke_copy_to_stack(paramCount, params, stack_space, overflow);

    PRUint32 *d_gpr = stack_space + overflow;
    double *d_fpr = reinterpret_cast<double *>(d_gpr + 4);

    return method(that, d_gpr[0], d_gpr[1], d_gpr[2], d_gpr[3],  d_fpr[0], d_fpr[1]);
}
NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
                 uint32_t paramCount, nsXPTCVariant* params)
{
    vtable_func *vtable = *reinterpret_cast<vtable_func **>(that);
    vtable_func method = vtable[methodIndex];
    uint64_t overflow = invoke_count_words (paramCount, params);
    uint64_t *stack_space = reinterpret_cast<uint64_t *>(__builtin_alloca((overflow + 8 /* 4 64-bits gpr + 4 64-bits fpr */) * 8));
    uint64_t result;

    invoke_copy_to_stack(paramCount, params, stack_space, overflow);

    uint64_t *d_gpr = stack_space + overflow;
    double *d_fpr = reinterpret_cast<double *>(d_gpr + 4);

    return method(that, d_gpr[0], d_gpr[1], d_gpr[2], d_gpr[3], d_fpr[0], d_fpr[1], d_fpr[2], d_fpr[3]);
}
NS_InvokeByIndex_P(nsISupports * that, uint32_t methodIndex,
                 uint32_t paramCount, nsXPTCVariant * params)
{
    uint32_t nr_gpr, nr_fpr, nr_stack;
    invoke_count_words(paramCount, params, nr_gpr, nr_fpr, nr_stack);
    
    // Stack, if used, must be 16-bytes aligned
    if (nr_stack)
        nr_stack = (nr_stack + 1) & ~1;

    // Load parameters to stack, if necessary
    uint64_t *stack = (uint64_t *) __builtin_alloca(nr_stack * 8);
    uint64_t gpregs[GPR_COUNT];
    double fpregs[FPR_COUNT];
    invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs);

    // Load FPR registers from fpregs[]
    register double d0 asm("xmm0");
    register double d1 asm("xmm1");
    register double d2 asm("xmm2");
    register double d3 asm("xmm3");
    register double d4 asm("xmm4");
    register double d5 asm("xmm5");
    register double d6 asm("xmm6");
    register double d7 asm("xmm7");

    switch (nr_fpr) {
#define ARG_FPR(N) \
    case N+1: d##N = fpregs[N];
        ARG_FPR(7);
        ARG_FPR(6);
        ARG_FPR(5);
        ARG_FPR(4);
        ARG_FPR(3);
        ARG_FPR(2);
        ARG_FPR(1);
        ARG_FPR(0);
    case 0:;
#undef ARG_FPR
    }
    
    // Load GPR registers from gpregs[]
    register uint64_t a0 asm("rdi");
    register uint64_t a1 asm("rsi");
    register uint64_t a2 asm("rdx");
    register uint64_t a3 asm("rcx");
    register uint64_t a4 asm("r8");
    register uint64_t a5 asm("r9");
    
    switch (nr_gpr) {
#define ARG_GPR(N) \
    case N+1: a##N = gpregs[N];
        ARG_GPR(5);
        ARG_GPR(4);
        ARG_GPR(3);
        ARG_GPR(2);
        ARG_GPR(1);
    case 1: a0 = (uint64_t) that;
    case 0:;
#undef ARG_GPR
    }

    // Ensure that assignments to SSE registers won't be optimized away
    asm("" ::
        "x" (d0), "x" (d1), "x" (d2), "x" (d3),
        "x" (d4), "x" (d5), "x" (d6), "x" (d7));
    
    // Get pointer to method
    uint64_t methodAddress = *((uint64_t *)that);
    methodAddress += 8 * methodIndex;
    methodAddress = *((uint64_t *)methodAddress);
    
    typedef uint32_t (*Method)(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
    uint32_t result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5);
    return result;
}
NS_InvokeByIndex(nsISupports * that, uint32_t methodIndex,
                 uint32_t paramCount, nsXPTCVariant * params)
{
    uint32_t nr_stack;
    invoke_count_words(paramCount, params, nr_stack);
    
    // Stack, if used, must be 16-bytes aligned
    if (nr_stack)
        nr_stack = (nr_stack + 1) & ~1;

    // Load parameters to stack, if necessary
    uint64_t *stack = (uint64_t *) __builtin_alloca(nr_stack * 8);
    uint64_t gpregs[GPR_COUNT];
    double fpregs[FPR_COUNT];
    invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs);

    // We used to have switches to make sure we would only load the registers
    // that are needed for this call. That produced larger code that was
    // not faster in practice. It also caused compiler warnings about the
    // variables being used uninitialized.
    // We now just load every every register. There could still be a warning
    // from a memory analysis tools that we are loading uninitialized stack
    // positions.

    // FIXME: this function depends on the above __builtin_alloca placing
    // the array in the correct spot for the ABI.

    // Load FPR registers from fpregs[]
    double d0, d1, d2, d3, d4, d5, d6, d7;

    d7 = fpregs[7];
    d6 = fpregs[6];
    d5 = fpregs[5];
    d4 = fpregs[4];
    d3 = fpregs[3];
    d2 = fpregs[2];
    d1 = fpregs[1];
    d0 = fpregs[0];

    // Load GPR registers from gpregs[]
    uint64_t a0, a1, a2, a3, a4, a5;

    a5 = gpregs[5];
    a4 = gpregs[4];
    a3 = gpregs[3];
    a2 = gpregs[2];
    a1 = gpregs[1];
    a0 = (uint64_t) that;

    // Get pointer to method
    uint64_t methodAddress = *((uint64_t *)that);
    methodAddress += 8 * methodIndex;
    methodAddress = *((uint64_t *)methodAddress);
    
    typedef nsresult (*Method)(uint64_t, uint64_t, uint64_t, uint64_t,
                               uint64_t, uint64_t, double, double, double,
                               double, double, double, double, double);
    nsresult result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5,
                                              d0, d1, d2, d3, d4, d5,
                                              d6, d7);
    return result;
}
NS_InvokeByIndex_P(nsISupports * that, PRUint32 methodIndex,
                 PRUint32 paramCount, nsXPTCVariant * params)
{
    PRUint32 nr_gpr, nr_fpr, nr_stack;
    invoke_count_words(paramCount, params, nr_gpr, nr_fpr, nr_stack);
    
    // Stack, if used, must be 16-bytes aligned
    if (nr_stack)
        nr_stack = (nr_stack + 1) & ~1;

    // Load parameters to stack, if necessary
    PRUint64 *stack = (PRUint64 *) __builtin_alloca(nr_stack * 8);
    PRUint64 gpregs[GPR_COUNT];
    double fpregs[FPR_COUNT];
    invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs);

    // Load FPR registers from fpregs[]
    double d0, d1, d2, d3, d4, d5, d6, d7;

    switch (nr_fpr) {
#define ARG_FPR(N) \
    case N+1: d##N = fpregs[N];
        ARG_FPR(7);
        ARG_FPR(6);
        ARG_FPR(5);
        ARG_FPR(4);
        ARG_FPR(3);
        ARG_FPR(2);
        ARG_FPR(1);
        ARG_FPR(0);
    case 0:;
#undef ARG_FPR
    }
    
    // Load GPR registers from gpregs[]
    PRUint64 a0, a1, a2, a3, a4, a5;
    
    switch (nr_gpr) {
#define ARG_GPR(N) \
    case N+1: a##N = gpregs[N];
        ARG_GPR(5);
        ARG_GPR(4);
        ARG_GPR(3);
        ARG_GPR(2);
        ARG_GPR(1);
    case 1: a0 = (PRUint64) that;
    case 0:;
#undef ARG_GPR
    }
    
    // Get pointer to method
    PRUint64 methodAddress = *((PRUint64 *)that);
    methodAddress += 8 * methodIndex;
    methodAddress = *((PRUint64 *)methodAddress);
    
    typedef PRUint32 (*Method)(PRUint64, PRUint64, PRUint64, PRUint64,
                               PRUint64, PRUint64, double, double, double,
                               double, double, double, double, double);
    PRUint32 result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5,
                                              d0, d1, d2, d3, d4, d5,
                                              d6, d7);
    return result;
}
XPTC_InvokeByIndex(nsISupports * that, PRUint32 methodIndex,
                   PRUint32 paramCount, nsXPTCVariant * params)
{
    PRUint32 nr_gpr, nr_fpr, nr_stack;
    invoke_count_words(paramCount, params, nr_gpr, nr_fpr, nr_stack);
    
    // Stack, if used, must be 16-bytes aligned
    if (nr_stack)
        nr_stack = (nr_stack + 1) & ~1;

    // Load parameters to stack, if necessary
    PRUint64 *stack = (PRUint64 *) __builtin_alloca(nr_stack * 8);
    PRUint64 gpregs[GPR_COUNT];
    double fpregs[FPR_COUNT];
    invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs);

    // disable the warning about sometimes not initialized variables which is hit
    // when we pass less than 8 XMM or less than 6 GPR registers.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wsometimes-uninitialized"

    // Load FPR registers from fpregs[]
    register double d0 asm("xmm0");
    register double d1 asm("xmm1");
    register double d2 asm("xmm2");
    register double d3 asm("xmm3");
    register double d4 asm("xmm4");
    register double d5 asm("xmm5");
    register double d6 asm("xmm6");
    register double d7 asm("xmm7");

    switch (nr_fpr) {
#define ARG_FPR(N) \
    case N+1: d##N = fpregs[N];
        ARG_FPR(7);
        ARG_FPR(6);
        ARG_FPR(5);
        ARG_FPR(4);
        ARG_FPR(3);
        ARG_FPR(2);
        ARG_FPR(1);
        ARG_FPR(0);
    case 0:;
#undef ARG_FPR
    }
    
    // Load GPR registers from gpregs[]
    register PRUint64 a0 asm("rdi");
    register PRUint64 a1 asm("rsi");
    register PRUint64 a2 asm("rdx");
    register PRUint64 a3 asm("rcx");
    register PRUint64 a4 asm("r8");
    register PRUint64 a5 asm("r9");
    
    switch (nr_gpr) {
#define ARG_GPR(N) \
    case N+1: a##N = gpregs[N];
        ARG_GPR(5);
        ARG_GPR(4);
        ARG_GPR(3);
        ARG_GPR(2);
        ARG_GPR(1);
    case 1: a0 = (PRUint64) that;
    case 0:;
#undef ARG_GPR
    }

    // Ensure that assignments to SSE registers won't be optimized away
    asm("" ::
        "x" (d0), "x" (d1), "x" (d2), "x" (d3),
        "x" (d4), "x" (d5), "x" (d6), "x" (d7));
    
    // Get pointer to method
    PRUint64 methodAddress = *((PRUint64 *)that);
    methodAddress += 8 * methodIndex;
    methodAddress = *((PRUint64 *)methodAddress);
    
    typedef PRUint32 (*Method)(PRUint64, PRUint64, PRUint64, PRUint64, PRUint64, PRUint64);
    PRUint32 result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5);
    return result;

#pragma clang diagnostic pop
}