Example #1
0
 *
 * On the PPC 601, unrolling the loops more doesn't seem to speed things
 * up at all.  I'd be curious if other chips differed.
 */
#if __MWERKS__ < 0x800

#include "ppcasm.h"	/* PowerPC assembler */
 
/*
 * MulN1 expects (*out, *in, len, k), count >= 1
 *                r3    r4   r5   r6
 */
static const unsigned mulN1[] = {
	PPC_LWZ(7,4,0), 	/* Load first word of in in r7 */
	PPC_MULLW(8,7,6),	/* Low half of multiply in r8 */
	PPC_MTCTR(5),		/* Move len into CTR */
	PPC_ADDIC(0,0,0),	/* Clear carry bit for loop */
	PPC_MULHWU(5,7,6),	/* High half of multiply in r5 */
	PPC_STW(8,3,0),
	PPC_BC(18,31,7),	/* Branch to Label if --ctr == 0 */
/* Loop: */
	PPC_LWZU(7,4,4),	/* r7 = *++in */
	PPC_MULLW(8,7,6),	/* r8 = low word of product */
	PPC_ADDE(8,8,5),	/* Add carry word r5 and bit CF to r8 */
	PPC_STWU(8,3,4),	/* *++out = r8 */
	PPC_MULHWU(5,7,6),	/* r5 is high word of product, for carry word */
	PPC_BC(16,31,-5),	/* Branch to Loop if --ctr != 0 */
/* Label: */
	PPC_ADDZE(5,5),		/* Add carry flag to r5 */
	PPC_STW(5,3,4),		/* out[1] = r5 */
	PPC_BLR()
Example #2
0
static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
{
	/*
	 * By now, the eBPF program has already setup parameters in r3, r4 and r5
	 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
	 * r4/BPF_REG_2 - pointer to bpf_array
	 * r5/BPF_REG_3 - index in bpf_array
	 */
	int b2p_bpf_array = b2p[BPF_REG_2];
	int b2p_index = b2p[BPF_REG_3];

	/*
	 * if (index >= array->map.max_entries)
	 *   goto out;
	 */
	PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
	PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
	PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
	PPC_BCC(COND_GE, out);

	/*
	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
	 *   goto out;
	 */
	PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
	PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
	PPC_BCC(COND_GT, out);

	/*
	 * tail_call_cnt++;
	 */
	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
	PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));

	/* prog = array->ptrs[index]; */
	PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
	PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
	PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));

	/*
	 * if (prog == NULL)
	 *   goto out;
	 */
	PPC_CMPLDI(b2p[TMP_REG_1], 0);
	PPC_BCC(COND_EQ, out);

	/* goto *(prog->bpf_func + prologue_size); */
	PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
#ifdef PPC64_ELF_ABI_v1
	/* skip past the function descriptor */
	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
			FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
#else
	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
#endif
	PPC_MTCTR(b2p[TMP_REG_1]);

	/* tear down stack, restore NVRs, ... */
	bpf_jit_emit_common_epilogue(image, ctx);

	PPC_BCTR();
	/* out: */
}