static void nds32_emit_mem_move_block (int base_regno, int count, rtx *dst_base_reg, rtx *dst_mem, rtx *src_base_reg, rtx *src_mem, bool update_base_reg_p) { rtx new_base_reg; emit_insn (nds32_expand_load_multiple (base_regno, count, *src_base_reg, *src_mem, update_base_reg_p, &new_base_reg)); if (update_base_reg_p) { *src_base_reg = new_base_reg; *src_mem = gen_rtx_MEM (SImode, *src_base_reg); } emit_insn (nds32_expand_store_multiple (base_regno, count, *dst_base_reg, *dst_mem, update_base_reg_p, &new_base_reg)); if (update_base_reg_p) { *dst_base_reg = new_base_reg; *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg); } }
/* Function to move block memory content by using load_multiple and store_multiple. This is auxiliary extern function to help create rtx template. Check nds32-multiple.md file for the patterns. */ int nds32_expand_movmemqi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) { HOST_WIDE_INT in_words, out_words; rtx dst_base_reg, src_base_reg; int maximum_bytes; /* Because reduced-set regsiters has few registers (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot be used for register allocation), using 8 registers (32 bytes) for moving memory block may easily consume all of them. It makes register allocation/spilling hard to work. So we only allow maximum=4 registers (16 bytes) for moving memory block under reduced-set registers. */ if (TARGET_REDUCED_REGS) maximum_bytes = 16; else maximum_bytes = 32; /* 1. Total_bytes is integer for sure. 2. Alignment is integer for sure. 3. Maximum 4 or 8 registers, 4 * 4 = 16 bytes, 8 * 4 = 32 bytes. 4. Requires (n * 4) block size. 5. Requires 4-byte alignment. */ if (GET_CODE (total_bytes) != CONST_INT || GET_CODE (alignment) != CONST_INT || INTVAL (total_bytes) > maximum_bytes || INTVAL (total_bytes) & 3 || INTVAL (alignment) & 3) return 0; dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0)); out_words = in_words = INTVAL (total_bytes) / UNITS_PER_WORD; emit_insn (nds32_expand_load_multiple (0, in_words, src_base_reg, srcmem)); emit_insn (nds32_expand_store_multiple (0, out_words, dst_base_reg, dstmem)); /* Successfully create patterns, return 1. */ return 1; }
static bool nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value, rtx align ATTRIBUTE_UNUSED, rtx expected_align ATTRIBUTE_UNUSED, rtx expected_size ATTRIBUTE_UNUSED) { unsigned maximum_regs, maximum_bytes, start_regno, regno; rtx value4word; rtx dst_base_reg, new_base_reg; unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw; unsigned HOST_WIDE_INT real_size; if (TARGET_REDUCED_REGS) { maximum_regs = 4; maximum_bytes = 64; start_regno = 2; } else { maximum_regs = 8; maximum_bytes = 128; start_regno = 16; } real_size = UINTVAL (size) & GET_MODE_MASK(SImode); if (!(CONST_INT_P (size) && real_size <= maximum_bytes)) return false; remain_bytes = real_size; gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); value4word = nds32_gen_dup_4_byte_to_word_value (value); prepare_regs = remain_bytes / UNITS_PER_WORD; dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); if (prepare_regs > maximum_regs) prepare_regs = maximum_regs; fill_per_smw = prepare_regs * UNITS_PER_WORD; regno = start_regno; switch (prepare_regs) { case 2: default: { rtx reg0 = gen_rtx_REG (SImode, regno); rtx reg1 = gen_rtx_REG (SImode, regno+1); unsigned last_regno = start_regno + prepare_regs - 1; emit_move_insn (reg0, value4word); emit_move_insn (reg1, value4word); rtx regd = gen_rtx_REG (DImode, regno); regno += 2; /* Try to utilize movd44! */ while (regno <= last_regno) { if ((regno + 1) <=last_regno) { rtx reg = gen_rtx_REG (DImode, regno); emit_move_insn (reg, regd); regno += 2; } else { rtx reg = gen_rtx_REG (SImode, regno); emit_move_insn (reg, reg0); regno += 1; } } break; } case 1: { rtx reg = gen_rtx_REG (SImode, regno++); emit_move_insn (reg, value4word); } break; case 0: break; } if (fill_per_smw) for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw) { emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs, dst_base_reg, dstmem, true, &new_base_reg)); dst_base_reg = new_base_reg; dstmem = gen_rtx_MEM (SImode, dst_base_reg); } remain_words = remain_bytes / UNITS_PER_WORD; if (remain_words) { emit_insn (nds32_expand_store_multiple (start_regno, remain_words, dst_base_reg, dstmem, true, &new_base_reg)); dst_base_reg = new_base_reg; dstmem = gen_rtx_MEM (SImode, dst_base_reg); } remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); if (remain_bytes) { value = simplify_gen_subreg (QImode, value4word, SImode, subreg_lowpart_offset(QImode, SImode)); int offset = 0; for (;remain_bytes;--remain_bytes, ++offset) { nds32_emit_load_store (value, dstmem, QImode, offset, false); } } return true; }