static rtx nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word) { gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); if (CONST_INT_P (value)) { unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode); rtx new_val = gen_int_mode (val | (val << 8) | (val << 16) | (val << 24), SImode); /* Just calculate at here if it's constant value. */ emit_move_insn (value4word, new_val); } else { if (NDS32_EXT_DSP_P ()) { /* ! prepare word insb $tmp, $value, 1 ! $tmp <- 0x0000abab pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */ rtx tmp = gen_reg_rtx (SImode); convert_move (tmp, value, true); emit_insn ( gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp)); emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp)); } else { /* ! prepare word andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ rtx tmp1, tmp2, tmp3, tmp4; tmp1 = expand_binop (SImode, and_optab, value, gen_int_mode (0xff, SImode), NULL_RTX, 0, OPTAB_WIDEN); tmp2 = expand_binop (SImode, ashl_optab, tmp1, gen_int_mode (8, SImode), NULL_RTX, 0, OPTAB_WIDEN); tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, NULL_RTX, 0, OPTAB_WIDEN); tmp4 = expand_binop (SImode, ashl_optab, tmp3, gen_int_mode (16, SImode), NULL_RTX, 0, OPTAB_WIDEN); emit_insn (gen_iorsi3 (value4word, tmp3, tmp4)); } } return value4word; }
/* Expand an AArch64 AdvSIMD builtin(intrinsic). */ rtx aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) { if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK) { rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0)); rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1)); if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize) && UINTVAL (elementsize) != 0 && UINTVAL (totalsize) != 0) { rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2)); if (CONST_INT_P (lane_idx)) aarch64_simd_lane_bounds (lane_idx, 0, UINTVAL (totalsize) / UINTVAL (elementsize), exp); else error ("%Klane index must be a constant immediate", exp); } else error ("%Ktotal size and element size must be a non-zero constant immediate", exp); /* Don't generate any RTL. */ return const0_rtx; } aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START]; enum insn_code icode = d->code; builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1]; int num_args = insn_data[d->code].n_operands; int is_void = 0; int k; is_void = !!(d->qualifiers[0] & qualifier_void); num_args += is_void; for (k = 1; k < num_args; k++) { /* We have four arrays of data, each indexed in a different fashion. qualifiers - element 0 always describes the function return type. operands - element 0 is either the operand for return value (if the function has a non-void return type) or the operand for the first argument. expr_args - element 0 always holds the first argument. args - element 0 is always used for the return type. */ int qualifiers_k = k; int operands_k = k - is_void; int expr_args_k = k - 1; if (d->qualifiers[qualifiers_k] & qualifier_lane_index) args[k] = SIMD_ARG_LANE_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_immediate) args[k] = SIMD_ARG_CONSTANT; else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) { rtx arg = expand_normal (CALL_EXPR_ARG (exp, (expr_args_k))); /* Handle constants only if the predicate allows it. */ bool op_const_int_p = (CONST_INT_P (arg) && (*insn_data[icode].operand[operands_k].predicate) (arg, insn_data[icode].operand[operands_k].mode)); args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG; } else args[k] = SIMD_ARG_COPY_TO_REG; } args[k] = SIMD_ARG_STOP; /* The interface to aarch64_simd_expand_args expects a 0 if the function is void, and a 1 if it is not. */ return aarch64_simd_expand_args (target, icode, !is_void, exp, &args[1]); }
static bool nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value, rtx align ATTRIBUTE_UNUSED, rtx expected_align ATTRIBUTE_UNUSED, rtx expected_size ATTRIBUTE_UNUSED) { unsigned maximum_regs, maximum_bytes, start_regno, regno; rtx value4word; rtx dst_base_reg, new_base_reg; unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw; unsigned HOST_WIDE_INT real_size; if (TARGET_REDUCED_REGS) { maximum_regs = 4; maximum_bytes = 64; start_regno = 2; } else { maximum_regs = 8; maximum_bytes = 128; start_regno = 16; } real_size = UINTVAL (size) & GET_MODE_MASK(SImode); if (!(CONST_INT_P (size) && real_size <= maximum_bytes)) return false; remain_bytes = real_size; gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); value4word = nds32_gen_dup_4_byte_to_word_value (value); prepare_regs = remain_bytes / UNITS_PER_WORD; dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); if (prepare_regs > maximum_regs) prepare_regs = maximum_regs; fill_per_smw = prepare_regs * UNITS_PER_WORD; regno = start_regno; switch (prepare_regs) { case 2: default: { rtx reg0 = gen_rtx_REG (SImode, regno); rtx reg1 = gen_rtx_REG (SImode, regno+1); unsigned last_regno = start_regno + prepare_regs - 1; emit_move_insn (reg0, value4word); emit_move_insn (reg1, value4word); rtx regd = gen_rtx_REG (DImode, regno); regno += 2; /* Try to utilize movd44! */ while (regno <= last_regno) { if ((regno + 1) <=last_regno) { rtx reg = gen_rtx_REG (DImode, regno); emit_move_insn (reg, regd); regno += 2; } else { rtx reg = gen_rtx_REG (SImode, regno); emit_move_insn (reg, reg0); regno += 1; } } break; } case 1: { rtx reg = gen_rtx_REG (SImode, regno++); emit_move_insn (reg, value4word); } break; case 0: break; } if (fill_per_smw) for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw) { emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs, dst_base_reg, dstmem, true, &new_base_reg)); dst_base_reg = new_base_reg; dstmem = gen_rtx_MEM (SImode, dst_base_reg); } remain_words = remain_bytes / UNITS_PER_WORD; if (remain_words) { emit_insn (nds32_expand_store_multiple (start_regno, remain_words, dst_base_reg, dstmem, true, &new_base_reg)); dst_base_reg = new_base_reg; dstmem = gen_rtx_MEM (SImode, dst_base_reg); } remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); if (remain_bytes) { value = simplify_gen_subreg (QImode, value4word, SImode, subreg_lowpart_offset(QImode, SImode)); int offset = 0; for (;remain_bytes;--remain_bytes, ++offset) { nds32_emit_load_store (value, dstmem, QImode, offset, false); } } return true; }
static bool nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, rtx size, rtx alignment) { rtx dst_base_reg, src_base_reg; rtx dst_itr, src_itr; rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; rtx dst_end; rtx double_word_mode_loop, byte_mode_loop; rtx tmp; int start_regno; bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; unsigned HOST_WIDE_INT total_bytes = UINTVAL (size); if (TARGET_ISA_V3M && !align_to_4_bytes) return 0; if (TARGET_REDUCED_REGS) start_regno = 2; else start_regno = 16; dst_itr = gen_reg_rtx (Pmode); src_itr = gen_reg_rtx (Pmode); dst_end = gen_reg_rtx (Pmode); tmp = gen_reg_rtx (QImode); double_word_mode_loop = gen_label_rtx (); byte_mode_loop = gen_label_rtx (); dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); if (total_bytes < 8) { /* Emit total_bytes less than 8 loop version of movmem. add $dst_end, $dst, $size move $dst_itr, $dst .Lbyte_mode_loop: lbi.bi $tmp, [$src_itr], #1 sbi.bi $tmp, [$dst_itr], #1 ! Not readch upper bound. Loop. bne $dst_itr, $dst_end, .Lbyte_mode_loop */ /* add $dst_end, $dst, $size */ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, NULL_RTX, 0, OPTAB_WIDEN); /* move $dst_itr, $dst move $src_itr, $src */ emit_move_insn (dst_itr, dst_base_reg); emit_move_insn (src_itr, src_base_reg); /* .Lbyte_mode_loop: */ emit_label (byte_mode_loop); /* lbi.bi $tmp, [$src_itr], #1 */ nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); /* sbi.bi $tmp, [$dst_itr], #1 */ nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); /* ! Not readch upper bound. Loop. bne $dst_itr, $dst_end, .Lbyte_mode_loop */ emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, SImode, 1, byte_mode_loop); return true; } else if (total_bytes % 8 == 0) { /* Emit multiple of 8 loop version of movmem. add $dst_end, $dst, $size move $dst_itr, $dst move $src_itr, $src .Ldouble_word_mode_loop: lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr ! move will delete after register allocation move $src_itr, $src_itr' move $dst_itr, $dst_itr' ! Not readch upper bound. Loop. bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ /* add $dst_end, $dst, $size */ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, NULL_RTX, 0, OPTAB_WIDEN); /* move $dst_itr, $dst move $src_itr, $src */ emit_move_insn (dst_itr, dst_base_reg); emit_move_insn (src_itr, src_base_reg); /* .Ldouble_word_mode_loop: */ emit_label (double_word_mode_loop); /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ src_itr_m = src_itr; dst_itr_m = dst_itr; srcmem_m = srcmem; dstmem_m = dstmem; nds32_emit_mem_move_block (start_regno, 2, &dst_itr_m, &dstmem_m, &src_itr_m, &srcmem_m, true); /* move $src_itr, $src_itr' move $dst_itr, $dst_itr' */ emit_move_insn (dst_itr, dst_itr_m); emit_move_insn (src_itr, src_itr_m); /* ! Not readch upper bound. Loop. bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL, Pmode, 1, double_word_mode_loop); } else { /* Handle size greater than 8, and not a multiple of 8. */ return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, size, alignment); } return true; }