static rtx
nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
{
  gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));

  if (CONST_INT_P (value))
    {
      unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode);
      rtx new_val = gen_int_mode (val | (val << 8)
				  | (val << 16) | (val << 24), SImode);
      /* Just calculate at here if it's constant value.  */
      emit_move_insn (value4word, new_val);
    }
  else
    {
      if (NDS32_EXT_DSP_P ())
	{
	  /* ! prepare word
	     insb    $tmp, $value, 1         ! $tmp  <- 0x0000abab
	     pkbb16  $tmp6, $tmp2, $tmp2   ! $value4word  <- 0xabababab */
	  rtx tmp = gen_reg_rtx (SImode);

	  convert_move (tmp, value, true);

	  emit_insn (
	    gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp));

	  emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp));
	}
      else
	{
	  /* ! prepare word
	     andi    $tmp1, $value, 0xff       ! $tmp1  <- 0x000000ab
	     slli    $tmp2, $tmp1, 8           ! $tmp2  <- 0x0000ab00
	     or      $tmp3, $tmp1, $tmp2       ! $tmp3  <- 0x0000abab
	     slli    $tmp4, $tmp3, 16          ! $tmp4  <- 0xabab0000
	     or      $val4word, $tmp3, $tmp4   ! $value4word  <- 0xabababab  */

	  rtx tmp1, tmp2, tmp3, tmp4;
	  tmp1 = expand_binop (SImode, and_optab, value,
			       gen_int_mode (0xff, SImode),
			       NULL_RTX, 0, OPTAB_WIDEN);
	  tmp2 = expand_binop (SImode, ashl_optab, tmp1,
			       gen_int_mode (8, SImode),
			       NULL_RTX, 0, OPTAB_WIDEN);
	  tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
			       NULL_RTX, 0, OPTAB_WIDEN);
	  tmp4 = expand_binop (SImode, ashl_optab, tmp3,
			       gen_int_mode (16, SImode),
			       NULL_RTX, 0, OPTAB_WIDEN);

	  emit_insn (gen_iorsi3 (value4word, tmp3, tmp4));
	}
    }

  return value4word;
}
Example #2
0
/* Expand an AArch64 AdvSIMD builtin(intrinsic).  */
rtx
aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
{
  if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
    {
      rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
      rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
      if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
	  && UINTVAL (elementsize) != 0
	  && UINTVAL (totalsize) != 0)
	{
	  rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
          if (CONST_INT_P (lane_idx))
	    aarch64_simd_lane_bounds (lane_idx, 0,
				      UINTVAL (totalsize)
				       / UINTVAL (elementsize),
				      exp);
          else
	    error ("%Klane index must be a constant immediate", exp);
	}
      else
	error ("%Ktotal size and element size must be a non-zero constant immediate", exp);
      /* Don't generate any RTL.  */
      return const0_rtx;
    }
  aarch64_simd_builtin_datum *d =
		&aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
  enum insn_code icode = d->code;
  builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
  int num_args = insn_data[d->code].n_operands;
  int is_void = 0;
  int k;

  is_void = !!(d->qualifiers[0] & qualifier_void);

  num_args += is_void;

  for (k = 1; k < num_args; k++)
    {
      /* We have four arrays of data, each indexed in a different fashion.
	 qualifiers - element 0 always describes the function return type.
	 operands - element 0 is either the operand for return value (if
	   the function has a non-void return type) or the operand for the
	   first argument.
	 expr_args - element 0 always holds the first argument.
	 args - element 0 is always used for the return type.  */
      int qualifiers_k = k;
      int operands_k = k - is_void;
      int expr_args_k = k - 1;

      if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
	args[k] = SIMD_ARG_LANE_INDEX;
      else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
	args[k] = SIMD_ARG_CONSTANT;
      else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
	{
	  rtx arg
	    = expand_normal (CALL_EXPR_ARG (exp,
					    (expr_args_k)));
	  /* Handle constants only if the predicate allows it.  */
	  bool op_const_int_p =
	    (CONST_INT_P (arg)
	     && (*insn_data[icode].operand[operands_k].predicate)
		(arg, insn_data[icode].operand[operands_k].mode));
	  args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
	}
      else
	args[k] = SIMD_ARG_COPY_TO_REG;

    }
  args[k] = SIMD_ARG_STOP;

  /* The interface to aarch64_simd_expand_args expects a 0 if
     the function is void, and a 1 if it is not.  */
  return aarch64_simd_expand_args
	  (target, icode, !is_void, exp, &args[1]);
}
static bool
nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value,
			    rtx align ATTRIBUTE_UNUSED,
			    rtx expected_align ATTRIBUTE_UNUSED,
			    rtx expected_size ATTRIBUTE_UNUSED)
{
  unsigned maximum_regs, maximum_bytes, start_regno, regno;
  rtx value4word;
  rtx dst_base_reg, new_base_reg;
  unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw;
  unsigned HOST_WIDE_INT real_size;

  if (TARGET_REDUCED_REGS)
    {
      maximum_regs  = 4;
      maximum_bytes = 64;
      start_regno   = 2;
    }
  else
    {
      maximum_regs  = 8;
      maximum_bytes = 128;
      start_regno   = 16;
    }

  real_size = UINTVAL (size) & GET_MODE_MASK(SImode);

  if (!(CONST_INT_P (size) && real_size <= maximum_bytes))
    return false;

  remain_bytes = real_size;

  gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));

  value4word = nds32_gen_dup_4_byte_to_word_value (value);

  prepare_regs = remain_bytes / UNITS_PER_WORD;

  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));

  if (prepare_regs > maximum_regs)
    prepare_regs = maximum_regs;

  fill_per_smw = prepare_regs * UNITS_PER_WORD;

  regno = start_regno;
  switch (prepare_regs)
    {
    case 2:
    default:
      {
	rtx reg0 = gen_rtx_REG (SImode, regno);
	rtx reg1 = gen_rtx_REG (SImode, regno+1);
	unsigned last_regno = start_regno + prepare_regs - 1;

	emit_move_insn (reg0, value4word);
	emit_move_insn (reg1, value4word);
	rtx regd = gen_rtx_REG (DImode, regno);
	regno += 2;

	/* Try to utilize movd44!  */
	while (regno <= last_regno)
	  {
	    if ((regno + 1) <=last_regno)
	      {
		rtx reg = gen_rtx_REG (DImode, regno);
		emit_move_insn (reg, regd);
		regno += 2;
	      }
	    else
	      {
		rtx reg = gen_rtx_REG (SImode, regno);
		emit_move_insn (reg, reg0);
		regno += 1;
	      }
	  }
	break;
      }
    case 1:
      {
	rtx reg = gen_rtx_REG (SImode, regno++);
	emit_move_insn (reg, value4word);
      }
      break;
    case 0:
      break;
    }

  if (fill_per_smw)
    for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw)
      {
	emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs,
						dst_base_reg, dstmem,
						true, &new_base_reg));
	dst_base_reg = new_base_reg;
	dstmem = gen_rtx_MEM (SImode, dst_base_reg);
      }

  remain_words = remain_bytes / UNITS_PER_WORD;

  if (remain_words)
    {
      emit_insn (nds32_expand_store_multiple (start_regno, remain_words,
					      dst_base_reg, dstmem,
					      true, &new_base_reg));
      dst_base_reg = new_base_reg;
      dstmem = gen_rtx_MEM (SImode, dst_base_reg);
    }

  remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);

  if (remain_bytes)
    {
      value = simplify_gen_subreg (QImode, value4word, SImode,
				   subreg_lowpart_offset(QImode, SImode));
      int offset = 0;
      for (;remain_bytes;--remain_bytes, ++offset)
	{
	  nds32_emit_load_store (value, dstmem, QImode, offset, false);
	}
    }

  return true;
}
static bool
nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
				       rtx size, rtx alignment)
{
  rtx dst_base_reg, src_base_reg;
  rtx dst_itr, src_itr;
  rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
  rtx dst_end;
  rtx double_word_mode_loop, byte_mode_loop;
  rtx tmp;
  int start_regno;
  bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
  unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);

  if (TARGET_ISA_V3M && !align_to_4_bytes)
    return 0;

  if (TARGET_REDUCED_REGS)
    start_regno = 2;
  else
    start_regno = 16;

  dst_itr = gen_reg_rtx (Pmode);
  src_itr = gen_reg_rtx (Pmode);
  dst_end = gen_reg_rtx (Pmode);
  tmp = gen_reg_rtx (QImode);

  double_word_mode_loop = gen_label_rtx ();
  byte_mode_loop = gen_label_rtx ();

  dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
  src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));

  if (total_bytes < 8)
    {
      /* Emit total_bytes less than 8 loop version of movmem.
	add     $dst_end, $dst, $size
	move    $dst_itr, $dst
	.Lbyte_mode_loop:
	lbi.bi  $tmp, [$src_itr], #1
	sbi.bi  $tmp, [$dst_itr], #1
	! Not readch upper bound. Loop.
	bne     $dst_itr, $dst_end, .Lbyte_mode_loop */

      /* add     $dst_end, $dst, $size */
      dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
			      NULL_RTX, 0, OPTAB_WIDEN);
      /* move    $dst_itr, $dst
	 move    $src_itr, $src */
      emit_move_insn (dst_itr, dst_base_reg);
      emit_move_insn (src_itr, src_base_reg);

      /* .Lbyte_mode_loop: */
      emit_label (byte_mode_loop);

      /* lbi.bi  $tmp, [$src_itr], #1 */
      nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);

      /* sbi.bi  $tmp, [$dst_itr], #1 */
      nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
      /* ! Not readch upper bound. Loop.
	 bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
      emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
			       SImode, 1, byte_mode_loop);
      return true;
    }
  else if (total_bytes % 8 == 0)
    {
      /* Emit multiple of 8 loop version of movmem.

	 add     $dst_end, $dst, $size
	 move    $dst_itr, $dst
	 move    $src_itr, $src

	.Ldouble_word_mode_loop:
	lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
	smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
	! move will delete after register allocation
	move    $src_itr, $src_itr'
	move    $dst_itr, $dst_itr'
	! Not readch upper bound. Loop.
	bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */

      /* add     $dst_end, $dst, $size */
      dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
			      NULL_RTX, 0, OPTAB_WIDEN);

      /* move    $dst_itr, $dst
	 move    $src_itr, $src */
      emit_move_insn (dst_itr, dst_base_reg);
      emit_move_insn (src_itr, src_base_reg);

      /* .Ldouble_word_mode_loop: */
      emit_label (double_word_mode_loop);
      /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
	 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
      src_itr_m = src_itr;
      dst_itr_m = dst_itr;
      srcmem_m = srcmem;
      dstmem_m = dstmem;
      nds32_emit_mem_move_block (start_regno, 2,
				 &dst_itr_m, &dstmem_m,
				 &src_itr_m, &srcmem_m,
				 true);
      /* move    $src_itr, $src_itr'
	 move    $dst_itr, $dst_itr' */
      emit_move_insn (dst_itr, dst_itr_m);
      emit_move_insn (src_itr, src_itr_m);

      /* ! Not readch upper bound. Loop.
	 bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
      emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
			       Pmode, 1, double_word_mode_loop);
    }
  else
    {
      /* Handle size greater than 8, and not a multiple of 8.  */
      return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
						      size, alignment);
    }

  return true;
}