Exemplo n.º 1
0
static bool
nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
{
  rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
  rtx need_align_bytes = gen_reg_rtx (SImode);
  rtx last_2_bit = gen_reg_rtx (SImode);
  rtx byte_loop_base = gen_reg_rtx (SImode);
  rtx byte_loop_size = gen_reg_rtx (SImode);
  rtx remain_size = gen_reg_rtx (SImode);
  rtx new_base_reg;
  rtx value4byte, value4doubleword;
  rtx byte_mode_size;
  rtx last_byte_loop_label = gen_label_rtx ();

  size = force_reg (SImode, size);

  value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
  value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
				    subreg_lowpart_offset (QImode, DImode));

  emit_move_insn (byte_loop_size, size);
  emit_move_insn (byte_loop_base, base_reg);

  /* Jump to last byte loop if size is less than 16.  */
  emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL,
			   SImode, 1, last_byte_loop_label);

  /* Make sure align to 4 byte first since v3m can't unalign access.  */
  emit_insn (gen_andsi3 (last_2_bit,
			 base_reg,
			 gen_int_mode (0x3, SImode)));

  emit_insn (gen_subsi3 (need_align_bytes,
			 gen_int_mode (4, SImode),
			 last_2_bit));

  /* Align to 4 byte. */
  new_base_reg = emit_setmem_byte_loop (base_reg,
					need_align_bytes,
					value4byte,
					true);

  /* Calculate remain size. */
  emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));

  /* Set memory word by word. */
  byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
						remain_size,
						value4doubleword);

  emit_move_insn (byte_loop_base, new_base_reg);
  emit_move_insn (byte_loop_size, byte_mode_size);

  emit_label (last_byte_loop_label);

  /* And set memory for remain bytes. */
  emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false);
  return true;
}
Exemplo n.º 2
0
static bool
propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
{
  rtx x = *px, tem = NULL_RTX, op0, op1, op2;
  enum rtx_code code = GET_CODE (x);
  machine_mode mode = GET_MODE (x);
  machine_mode op_mode;
  bool can_appear = (flags & PR_CAN_APPEAR) != 0;
  bool valid_ops = true;

  if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
    {
      /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
	 they have side effects or not).  */
      *px = (side_effects_p (x)
	     ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
	     : gen_rtx_SCRATCH (GET_MODE (x)));
      return false;
    }

  /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
     address, and we are *not* inside one.  */
  if (x == old_rtx)
    {
      *px = new_rtx;
      return can_appear;
    }

  /* If this is an expression, try recursive substitution.  */
  switch (GET_RTX_CLASS (code))
    {
    case RTX_UNARY:
      op0 = XEXP (x, 0);
      op_mode = GET_MODE (op0);
      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
      if (op0 == XEXP (x, 0))
	return true;
      tem = simplify_gen_unary (code, mode, op0, op_mode);
      break;

    case RTX_BIN_ARITH:
    case RTX_COMM_ARITH:
      op0 = XEXP (x, 0);
      op1 = XEXP (x, 1);
      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
      if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
	return true;
      tem = simplify_gen_binary (code, mode, op0, op1);
      break;

    case RTX_COMPARE:
    case RTX_COMM_COMPARE:
      op0 = XEXP (x, 0);
      op1 = XEXP (x, 1);
      op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
      if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
	return true;
      tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
      break;

    case RTX_TERNARY:
    case RTX_BITFIELD_OPS:
      op0 = XEXP (x, 0);
      op1 = XEXP (x, 1);
      op2 = XEXP (x, 2);
      op_mode = GET_MODE (op0);
      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
      valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
      if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
	return true;
      if (op_mode == VOIDmode)
	op_mode = GET_MODE (op0);
      tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
      break;

    case RTX_EXTRA:
      /* The only case we try to handle is a SUBREG.  */
      if (code == SUBREG)
	{
          op0 = XEXP (x, 0);
	  valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
          if (op0 == XEXP (x, 0))
	    return true;
	  tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
				     SUBREG_BYTE (x));
	}
      break;

    case RTX_OBJ:
      if (code == MEM && x != new_rtx)
	{
	  rtx new_op0;
	  op0 = XEXP (x, 0);

	  /* There are some addresses that we cannot work on.  */
	  if (!can_simplify_addr (op0))
	    return true;

	  op0 = new_op0 = targetm.delegitimize_address (op0);
	  valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
					flags | PR_CAN_APPEAR);

	  /* Dismiss transformation that we do not want to carry on.  */
	  if (!valid_ops
	      || new_op0 == op0
	      || !(GET_MODE (new_op0) == GET_MODE (op0)
		   || GET_MODE (new_op0) == VOIDmode))
	    return true;

	  canonicalize_address (new_op0);

	  /* Copy propagations are always ok.  Otherwise check the costs.  */
	  if (!(REG_P (old_rtx) && REG_P (new_rtx))
	      && !should_replace_address (op0, new_op0, GET_MODE (x),
					  MEM_ADDR_SPACE (x),
	      			 	  flags & PR_OPTIMIZE_FOR_SPEED))
	    return true;

	  tem = replace_equiv_address_nv (x, new_op0);
	}

      else if (code == LO_SUM)
	{
          op0 = XEXP (x, 0);
          op1 = XEXP (x, 1);

	  /* The only simplification we do attempts to remove references to op0
	     or make it constant -- in both cases, op0's invalidity will not
	     make the result invalid.  */
	  propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
	  valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
          if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
	    return true;

	  /* (lo_sum (high x) x) -> x  */
	  if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
	    tem = op1;
	  else
	    tem = gen_rtx_LO_SUM (mode, op0, op1);

	  /* OP1 is likely not a legitimate address, otherwise there would have
	     been no LO_SUM.  We want it to disappear if it is invalid, return
	     false in that case.  */
	  return memory_address_p (mode, tem);
	}

      else if (code == REG)
	{
	  if (rtx_equal_p (x, old_rtx))
	    {
              *px = new_rtx;
              return can_appear;
	    }
	}
      break;

    default:
      break;
    }

  /* No change, no trouble.  */
  if (tem == NULL_RTX)
    return true;

  *px = tem;

  /* Allow replacements that simplify operations on a vector or complex
     value to a component.  The most prominent case is
     (subreg ([vec_]concat ...)).   */
  if (REG_P (tem) && !HARD_REGISTER_P (tem)
      && (VECTOR_MODE_P (GET_MODE (new_rtx))
	  || COMPLEX_MODE_P (GET_MODE (new_rtx)))
      && GET_MODE (tem) == GET_MODE_INNER (GET_MODE (new_rtx)))
    return true;

  /* The replacement we made so far is valid, if all of the recursive
     replacements were valid, or we could simplify everything to
     a constant.  */
  return valid_ops || can_appear || CONSTANT_P (tem);
}
Exemplo n.º 3
0
/* Scan X and replace any eliminable registers (such as fp) with a
   replacement (such as sp) if SUBST_P, plus an offset.  The offset is
   a change in the offset between the eliminable register and its
   substitution if UPDATE_P, or the full offset if FULL_P, or
   otherwise zero.  If FULL_P, we also use the SP offsets for
   elimination to SP.  If UPDATE_P, use UPDATE_SP_OFFSET for updating
   offsets of register elimnable to SP.  If UPDATE_SP_OFFSET is
   non-zero, don't use difference of the offset and the previous
   offset.

   MEM_MODE is the mode of an enclosing MEM.  We need this to know how
   much to adjust a register for, e.g., PRE_DEC.  Also, if we are
   inside a MEM, we are allowed to replace a sum of a hard register
   and the constant zero with the hard register, which we cannot do
   outside a MEM.  In addition, we need to record the fact that a
   hard register is referenced outside a MEM.

   If we make full substitution to SP for non-null INSN, add the insn
   sp offset.  */
rtx
lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
		      bool subst_p, bool update_p,
		      HOST_WIDE_INT update_sp_offset, bool full_p)
{
  enum rtx_code code = GET_CODE (x);
  struct lra_elim_table *ep;
  rtx new_rtx;
  int i, j;
  const char *fmt;
  int copied = 0;

  lra_assert (!update_p || !full_p);
  lra_assert (update_sp_offset == 0 || (!subst_p && update_p && !full_p));
  if (! current_function_decl)
    return x;

  switch (code)
    {
    CASE_CONST_ANY:
    case CONST:
    case SYMBOL_REF:
    case CODE_LABEL:
    case PC:
    case CC0:
    case ASM_INPUT:
    case ADDR_VEC:
    case ADDR_DIFF_VEC:
    case RETURN:
      return x;

    case REG:
      /* First handle the case where we encounter a bare hard register
	 that is eliminable.  Replace it with a PLUS.  */
      if ((ep = get_elimination (x)) != NULL)
	{
	  rtx to = subst_p ? ep->to_rtx : ep->from_rtx;

	  if (update_sp_offset != 0)
	    {
	      if (ep->to_rtx == stack_pointer_rtx)
		return plus_constant (Pmode, to, update_sp_offset);
	      return to;
	    }
	  else if (update_p)
	    return plus_constant (Pmode, to, ep->offset - ep->previous_offset);
	  else if (full_p)
	    return plus_constant (Pmode, to,
				  ep->offset
				  - (insn != NULL_RTX
				     && ep->to_rtx == stack_pointer_rtx
				     ? lra_get_insn_recog_data (insn)->sp_offset
				     : 0));
	  else
	    return to;
	}
      return x;

    case PLUS:
      /* If this is the sum of an eliminable register and a constant, rework
	 the sum.  */
      if (REG_P (XEXP (x, 0)) && CONSTANT_P (XEXP (x, 1)))
	{
	  if ((ep = get_elimination (XEXP (x, 0))) != NULL)
	    {
	      HOST_WIDE_INT offset;
	      rtx to = subst_p ? ep->to_rtx : ep->from_rtx;

	      if (! update_p && ! full_p)
		return gen_rtx_PLUS (Pmode, to, XEXP (x, 1));
	      
	      if (update_sp_offset != 0)
		offset = ep->to_rtx == stack_pointer_rtx ? update_sp_offset : 0;
	      else
		offset = (update_p
			  ? ep->offset - ep->previous_offset : ep->offset);
	      if (full_p && insn != NULL_RTX && ep->to_rtx == stack_pointer_rtx)
		offset -= lra_get_insn_recog_data (insn)->sp_offset;
	      if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == -offset)
		return to;
	      else
		return gen_rtx_PLUS (Pmode, to,
				     plus_constant (Pmode,
						    XEXP (x, 1), offset));
	    }

	  /* If the hard register is not eliminable, we are done since
	     the other operand is a constant.  */
	  return x;
	}

      /* If this is part of an address, we want to bring any constant
	 to the outermost PLUS.  We will do this by doing hard
	 register replacement in our operands and seeing if a constant
	 shows up in one of them.

	 Note that there is no risk of modifying the structure of the
	 insn, since we only get called for its operands, thus we are
	 either modifying the address inside a MEM, or something like
	 an address operand of a load-address insn.  */

      {
	rtx new0 = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode,
					 subst_p, update_p,
					 update_sp_offset, full_p);
	rtx new1 = lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode,
					 subst_p, update_p,
					 update_sp_offset, full_p);

	new0 = move_plus_up (new0);
	new1 = move_plus_up (new1);
	if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1))
	  return form_sum (new0, new1);
      }
      return x;

    case MULT:
      /* If this is the product of an eliminable hard register and a
	 constant, apply the distribute law and move the constant out
	 so that we have (plus (mult ..) ..).  This is needed in order
	 to keep load-address insns valid.  This case is pathological.
	 We ignore the possibility of overflow here.  */
      if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))
	  && (ep = get_elimination (XEXP (x, 0))) != NULL)
	{
	  rtx to = subst_p ? ep->to_rtx : ep->from_rtx;

	  if (update_sp_offset != 0)
	    {
	      if (ep->to_rtx == stack_pointer_rtx)
		return plus_constant (Pmode,
				      gen_rtx_MULT (Pmode, to, XEXP (x, 1)),
				      update_sp_offset * INTVAL (XEXP (x, 1)));
	      return gen_rtx_MULT (Pmode, to, XEXP (x, 1));
	    }
	  else if (update_p)
	    return plus_constant (Pmode,
				  gen_rtx_MULT (Pmode, to, XEXP (x, 1)),
				  (ep->offset - ep->previous_offset)
				  * INTVAL (XEXP (x, 1)));
	  else if (full_p)
	    {
	      HOST_WIDE_INT offset = ep->offset;

	      if (insn != NULL_RTX && ep->to_rtx == stack_pointer_rtx)
		offset -= lra_get_insn_recog_data (insn)->sp_offset;
	      return
		plus_constant (Pmode,
			       gen_rtx_MULT (Pmode, to, XEXP (x, 1)),
			       offset * INTVAL (XEXP (x, 1)));
	    }
	  else
	    return gen_rtx_MULT (Pmode, to, XEXP (x, 1));
	}

      /* fall through */

    case CALL:
    case COMPARE:
    /* See comments before PLUS about handling MINUS.  */
    case MINUS:
    case DIV:	   case UDIV:
    case MOD:	   case UMOD:
    case AND:	   case IOR:	  case XOR:
    case ROTATERT: case ROTATE:
    case ASHIFTRT: case LSHIFTRT: case ASHIFT:
    case NE:	   case EQ:
    case GE:	   case GT:	  case GEU:    case GTU:
    case LE:	   case LT:	  case LEU:    case LTU:
      {
	rtx new0 = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode,
					 subst_p, update_p, 
					 update_sp_offset, full_p);
	rtx new1 = XEXP (x, 1)
		   ? lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode,
					   subst_p, update_p,
					   update_sp_offset, full_p) : 0;

	if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1))
	  return gen_rtx_fmt_ee (code, GET_MODE (x), new0, new1);
      }
      return x;

    case EXPR_LIST:
      /* If we have something in XEXP (x, 0), the usual case,
	 eliminate it.	*/
      if (XEXP (x, 0))
	{
	  new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode,
					  subst_p, update_p,
					  update_sp_offset, full_p);
	  if (new_rtx != XEXP (x, 0))
	    {
	      /* If this is a REG_DEAD note, it is not valid anymore.
		 Using the eliminated version could result in creating a
		 REG_DEAD note for the stack or frame pointer.	*/
	      if (REG_NOTE_KIND (x) == REG_DEAD)
		return (XEXP (x, 1)
			? lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode,
						subst_p, update_p,
						update_sp_offset, full_p)
			: NULL_RTX);

	      x = alloc_reg_note (REG_NOTE_KIND (x), new_rtx, XEXP (x, 1));
	    }
	}

      /* fall through */

    case INSN_LIST:
    case INT_LIST:
      /* Now do eliminations in the rest of the chain.	If this was
	 an EXPR_LIST, this might result in allocating more memory than is
	 strictly needed, but it simplifies the code.  */
      if (XEXP (x, 1))
	{
	  new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode,
					  subst_p, update_p,
					  update_sp_offset, full_p);
	  if (new_rtx != XEXP (x, 1))
	    return
	      gen_rtx_fmt_ee (GET_CODE (x), GET_MODE (x),
			      XEXP (x, 0), new_rtx);
	}
      return x;

    case PRE_INC:
    case POST_INC:
    case PRE_DEC:
    case POST_DEC:
      /* We do not support elimination of a register that is modified.
	 elimination_effects has already make sure that this does not
	 happen.  */
      return x;

    case PRE_MODIFY:
    case POST_MODIFY:
      /* We do not support elimination of a hard register that is
	 modified.  LRA has already make sure that this does not
	 happen. The only remaining case we need to consider here is
	 that the increment value may be an eliminable register.  */
      if (GET_CODE (XEXP (x, 1)) == PLUS
	  && XEXP (XEXP (x, 1), 0) == XEXP (x, 0))
	{
	  rtx new_rtx = lra_eliminate_regs_1 (insn, XEXP (XEXP (x, 1), 1),
					      mem_mode, subst_p, update_p,
					      update_sp_offset, full_p);

	  if (new_rtx != XEXP (XEXP (x, 1), 1))
	    return gen_rtx_fmt_ee (code, GET_MODE (x), XEXP (x, 0),
				   gen_rtx_PLUS (GET_MODE (x),
						 XEXP (x, 0), new_rtx));
	}
      return x;

    case STRICT_LOW_PART:
    case NEG:	       case NOT:
    case SIGN_EXTEND:  case ZERO_EXTEND:
    case TRUNCATE:     case FLOAT_EXTEND: case FLOAT_TRUNCATE:
    case FLOAT:	       case FIX:
    case UNSIGNED_FIX: case UNSIGNED_FLOAT:
    case ABS:
    case SQRT:
    case FFS:
    case CLZ:
    case CTZ:
    case POPCOUNT:
    case PARITY:
    case BSWAP:
      new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode,
				      subst_p, update_p,
				      update_sp_offset, full_p);
      if (new_rtx != XEXP (x, 0))
	return gen_rtx_fmt_e (code, GET_MODE (x), new_rtx);
      return x;

    case SUBREG:
      new_rtx = lra_eliminate_regs_1 (insn, SUBREG_REG (x), mem_mode,
				      subst_p, update_p,
				      update_sp_offset, full_p);

      if (new_rtx != SUBREG_REG (x))
	{
	  int x_size = GET_MODE_SIZE (GET_MODE (x));
	  int new_size = GET_MODE_SIZE (GET_MODE (new_rtx));

	  if (MEM_P (new_rtx) && x_size <= new_size)
	    {
	      SUBREG_REG (x) = new_rtx;
	      alter_subreg (&x, false);
	      return x;
	    }
	  else if (! subst_p)
	    {
	      /* LRA can transform subregs itself.  So don't call
		 simplify_gen_subreg until LRA transformations are
		 finished.  Function simplify_gen_subreg can do
		 non-trivial transformations (like truncation) which
		 might make LRA work to fail.  */
	      SUBREG_REG (x) = new_rtx;
	      return x;
	    }
	  else
	    return simplify_gen_subreg (GET_MODE (x), new_rtx,
					GET_MODE (new_rtx), SUBREG_BYTE (x));
	}

      return x;

    case MEM:
      /* Our only special processing is to pass the mode of the MEM to our
	 recursive call and copy the flags.  While we are here, handle this
	 case more efficiently.	 */
      return
	replace_equiv_address_nv
	(x,
	 lra_eliminate_regs_1 (insn, XEXP (x, 0), GET_MODE (x),
			       subst_p, update_p, update_sp_offset, full_p));

    case USE:
      /* Handle insn_list USE that a call to a pure function may generate.  */
      new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), VOIDmode,
				      subst_p, update_p, update_sp_offset, full_p);
      if (new_rtx != XEXP (x, 0))
	return gen_rtx_USE (GET_MODE (x), new_rtx);
      return x;

    case CLOBBER:
    case SET:
      gcc_unreachable ();

    default:
      break;
    }

  /* Process each of our operands recursively.	If any have changed, make a
     copy of the rtx.  */
  fmt = GET_RTX_FORMAT (code);
  for (i = 0; i < GET_RTX_LENGTH (code); i++, fmt++)
    {
      if (*fmt == 'e')
	{
	  new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, i), mem_mode,
					  subst_p, update_p,
					  update_sp_offset, full_p);
	  if (new_rtx != XEXP (x, i) && ! copied)
	    {
	      x = shallow_copy_rtx (x);
	      copied = 1;
	    }
	  XEXP (x, i) = new_rtx;
	}
      else if (*fmt == 'E')
	{
	  int copied_vec = 0;
	  for (j = 0; j < XVECLEN (x, i); j++)
	    {
	      new_rtx = lra_eliminate_regs_1 (insn, XVECEXP (x, i, j), mem_mode,
					      subst_p, update_p,
					      update_sp_offset, full_p);
	      if (new_rtx != XVECEXP (x, i, j) && ! copied_vec)
		{
		  rtvec new_v = gen_rtvec_v (XVECLEN (x, i),
					     XVEC (x, i)->elem);
		  if (! copied)
		    {
		      x = shallow_copy_rtx (x);
		      copied = 1;
		    }
		  XVEC (x, i) = new_v;
		  copied_vec = 1;
		}
	      XVECEXP (x, i, j) = new_rtx;
	    }
	}
    }

  return x;
}
Exemplo n.º 4
0
static bool
nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value,
			    rtx align ATTRIBUTE_UNUSED,
			    rtx expected_align ATTRIBUTE_UNUSED,
			    rtx expected_size ATTRIBUTE_UNUSED)
{
  unsigned maximum_regs, maximum_bytes, start_regno, regno;
  rtx value4word;
  rtx dst_base_reg, new_base_reg;
  unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw;
  unsigned HOST_WIDE_INT real_size;

  if (TARGET_REDUCED_REGS)
    {
      maximum_regs  = 4;
      maximum_bytes = 64;
      start_regno   = 2;
    }
  else
    {
      maximum_regs  = 8;
      maximum_bytes = 128;
      start_regno   = 16;
    }

  real_size = UINTVAL (size) & GET_MODE_MASK(SImode);

  if (!(CONST_INT_P (size) && real_size <= maximum_bytes))
    return false;

  remain_bytes = real_size;

  gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));

  value4word = nds32_gen_dup_4_byte_to_word_value (value);

  prepare_regs = remain_bytes / UNITS_PER_WORD;

  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));

  if (prepare_regs > maximum_regs)
    prepare_regs = maximum_regs;

  fill_per_smw = prepare_regs * UNITS_PER_WORD;

  regno = start_regno;
  switch (prepare_regs)
    {
    case 2:
    default:
      {
	rtx reg0 = gen_rtx_REG (SImode, regno);
	rtx reg1 = gen_rtx_REG (SImode, regno+1);
	unsigned last_regno = start_regno + prepare_regs - 1;

	emit_move_insn (reg0, value4word);
	emit_move_insn (reg1, value4word);
	rtx regd = gen_rtx_REG (DImode, regno);
	regno += 2;

	/* Try to utilize movd44!  */
	while (regno <= last_regno)
	  {
	    if ((regno + 1) <=last_regno)
	      {
		rtx reg = gen_rtx_REG (DImode, regno);
		emit_move_insn (reg, regd);
		regno += 2;
	      }
	    else
	      {
		rtx reg = gen_rtx_REG (SImode, regno);
		emit_move_insn (reg, reg0);
		regno += 1;
	      }
	  }
	break;
      }
    case 1:
      {
	rtx reg = gen_rtx_REG (SImode, regno++);
	emit_move_insn (reg, value4word);
      }
      break;
    case 0:
      break;
    }

  if (fill_per_smw)
    for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw)
      {
	emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs,
						dst_base_reg, dstmem,
						true, &new_base_reg));
	dst_base_reg = new_base_reg;
	dstmem = gen_rtx_MEM (SImode, dst_base_reg);
      }

  remain_words = remain_bytes / UNITS_PER_WORD;

  if (remain_words)
    {
      emit_insn (nds32_expand_store_multiple (start_regno, remain_words,
					      dst_base_reg, dstmem,
					      true, &new_base_reg));
      dst_base_reg = new_base_reg;
      dstmem = gen_rtx_MEM (SImode, dst_base_reg);
    }

  remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);

  if (remain_bytes)
    {
      value = simplify_gen_subreg (QImode, value4word, SImode,
				   subreg_lowpart_offset(QImode, SImode));
      int offset = 0;
      for (;remain_bytes;--remain_bytes, ++offset)
	{
	  nds32_emit_load_store (value, dstmem, QImode, offset, false);
	}
    }

  return true;
}
Exemplo n.º 5
0
static bool
nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
{
  rtx value4doubleword;
  rtx value4byte;
  rtx dst;
  rtx byte_mode_size;

  /* Emit loop version of setmem.
     memset:
       ! prepare word
       andi    $tmp1, $val, 0xff               ! $tmp1  <- 0x000000ab
       slli    $tmp2, $tmp1, 8                 ! $tmp2  <- 0x0000ab00
       or      $tmp3, $val, $tmp2              ! $tmp3  <- 0x0000abab
       slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
       or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab

       and     $size_for_word, $size, #-4
       beqz    $size_for_word, .Lword_mode_end

       add     $word_mode_end, $dst, $size_for_word
       andi    $byte_mode_size, $size, 3

     .Lword_mode:
       ! word-mode set loop
       smw.bim $value4word, [$dst], $value4word, 0
       bne     $word_mode_end, $dst, .Lword_mode

     .Lword_mode_end:
       beqz    $byte_mode_size, .Lend
       add     $byte_mode_end, $dst, $byte_mode_size

     .Lbyte_mode:
       ! byte-mode set loop
       sbi.bi  $value4word, [$dst] ,1
       bne     $byte_mode_end, $dst, .Lbyte_mode
     .Lend: */

  dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0));

  /* ! prepare word
     andi    $tmp1, $value, 0xff             ! $tmp1  <- 0x000000ab
     slli    $tmp2, $tmp1, 8                 ! $tmp2  <- 0x0000ab00
     or      $tmp3, $tmp1, $tmp2             ! $tmp3  <- 0x0000abab
     slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
     or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab  */
  value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);

  /*   and     $size_for_word, $size, #-4
       beqz    $size_for_word, .Lword_mode_end

       add     $word_mode_end, $dst, $size_for_word
       andi    $byte_mode_size, $size, 3

     .Lword_mode:
       ! word-mode set loop
       smw.bim $value4word, [$dst], $value4word, 0
       bne     $word_mode_end, $dst, .Lword_mode
     .Lword_mode_end:  */
  byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);

  /*   beqz    $byte_mode_size, .Lend
       add     $byte_mode_end, $dst, $byte_mode_size

     .Lbyte_mode:
       ! byte-mode set loop
       sbi.bi  $value, [$dst] ,1
       bne     $byte_mode_end, $dst, .Lbyte_mode
     .Lend: */

  value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
				    subreg_lowpart_offset (QImode, DImode));

  emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);

  return true;
}