* * On the PPC 601, unrolling the loops more doesn't seem to speed things * up at all. I'd be curious if other chips differed. */ #if __MWERKS__ < 0x800 #include "ppcasm.h" /* PowerPC assembler */ /* * MulN1 expects (*out, *in, len, k), count >= 1 * r3 r4 r5 r6 */ static const unsigned mulN1[] = { PPC_LWZ(7,4,0), /* Load first word of in in r7 */ PPC_MULLW(8,7,6), /* Low half of multiply in r8 */ PPC_MTCTR(5), /* Move len into CTR */ PPC_ADDIC(0,0,0), /* Clear carry bit for loop */ PPC_MULHWU(5,7,6), /* High half of multiply in r5 */ PPC_STW(8,3,0), PPC_BC(18,31,7), /* Branch to Label if --ctr == 0 */ /* Loop: */ PPC_LWZU(7,4,4), /* r7 = *++in */ PPC_MULLW(8,7,6), /* r8 = low word of product */ PPC_ADDE(8,8,5), /* Add carry word r5 and bit CF to r8 */ PPC_STWU(8,3,4), /* *++out = r8 */ PPC_MULHWU(5,7,6), /* r5 is high word of product, for carry word */ PPC_BC(16,31,-5), /* Branch to Loop if --ctr != 0 */ /* Label: */ PPC_ADDZE(5,5), /* Add carry flag to r5 */ PPC_STW(5,3,4), /* out[1] = r5 */ PPC_BLR()
static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* * By now, the eBPF program has already setup parameters in r3, r4 and r5 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program * r4/BPF_REG_2 - pointer to bpf_array * r5/BPF_REG_3 - index in bpf_array */ int b2p_bpf_array = b2p[BPF_REG_2]; int b2p_index = b2p[BPF_REG_3]; /* * if (index >= array->map.max_entries) * goto out; */ PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); PPC_BCC(COND_GE, out); /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; */ PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); PPC_BCC(COND_GT, out); /* * tail_call_cnt++; */ PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); /* prog = array->ptrs[index]; */ PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); /* * if (prog == NULL) * goto out; */ PPC_CMPLDI(b2p[TMP_REG_1], 0); PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); #ifdef PPC64_ELF_ABI_v1 /* skip past the function descriptor */ PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); #else PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); #endif PPC_MTCTR(b2p[TMP_REG_1]); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); PPC_BCTR(); /* out: */ }