static enum machine_mode type_natural_mode (tree type) { enum machine_mode mode = TYPE_MODE (type); if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) { HOST_WIDE_INT size = int_size_in_bytes (type); if ((size == 8 || size == 16) /* ??? Generic code allows us to create width 1 vectors. Ignore. */ && TYPE_VECTOR_SUBPARTS (type) > 1) { enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) mode = MIN_MODE_VECTOR_FLOAT; else mode = MIN_MODE_VECTOR_INT; /* Get the mode which has this inner mode and number of units. */ for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) && GET_MODE_INNER (mode) == innermode) return mode; gcc_unreachable (); } } return mode; }
static tree aarch64_lookup_simd_builtin_type (enum machine_mode mode, enum aarch64_type_qualifiers q) { int i; int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); /* Non-poly scalar modes map to standard types not in the table. */ if (q != qualifier_poly && !VECTOR_MODE_P (mode)) return aarch64_simd_builtin_std_type (mode, q); for (i = 0; i < nelts; i++) if (aarch64_simd_types[i].mode == mode && aarch64_simd_types[i].q == q) return aarch64_simd_types[i].itype; return NULL_TREE; }
static void aarch64_init_simd_builtins (void) { unsigned int i, fcode = AARCH64_SIMD_BUILTIN_BASE + 1; aarch64_init_simd_builtin_types (); /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics. Therefore we need to preserve the old __builtin scalar types. It can be removed once all the intrinsics become strongly typed using the qualifier system. */ aarch64_init_simd_builtin_scalar_types (); for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) { bool print_type_signature_p = false; char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i]; char namebuf[60]; tree ftype = NULL; tree fndecl = NULL; d->fcode = fcode; /* We must track two variables here. op_num is the operand number as in the RTL pattern. This is required to access the mode (e.g. V4SF mode) of the argument, from which the base type can be derived. arg_num is an index in to the qualifiers data, which gives qualifiers to the type (e.g. const unsigned). The reason these two variables may differ by one is the void return type. While all return types take the 0th entry in the qualifiers array, there is no operand for them in the RTL pattern. */ int op_num = insn_data[d->code].n_operands - 1; int arg_num = d->qualifiers[0] & qualifier_void ? op_num + 1 : op_num; tree return_type = void_type_node, args = void_list_node; tree eltype; /* Build a function type directly from the insn_data for this builtin. The build_function_type () function takes care of removing duplicates for us. */ for (; op_num >= 0; arg_num--, op_num--) { machine_mode op_mode = insn_data[d->code].operand[op_num].mode; enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num]; if (qualifiers & qualifier_unsigned) { type_signature[arg_num] = 'u'; print_type_signature_p = true; } else if (qualifiers & qualifier_poly) { type_signature[arg_num] = 'p'; print_type_signature_p = true; } else type_signature[arg_num] = 's'; /* Skip an internal operand for vget_{low, high}. */ if (qualifiers & qualifier_internal) continue; /* Some builtins have different user-facing types for certain arguments, encoded in d->mode. */ if (qualifiers & qualifier_map_mode) op_mode = d->mode; /* For pointers, we want a pointer to the basic type of the vector. */ if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) op_mode = GET_MODE_INNER (op_mode); eltype = aarch64_simd_builtin_type (op_mode, (qualifiers & qualifier_unsigned) != 0, (qualifiers & qualifier_poly) != 0); gcc_assert (eltype != NULL); /* Add qualifiers. */ if (qualifiers & qualifier_const) eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); if (qualifiers & qualifier_pointer) eltype = build_pointer_type (eltype); /* If we have reached arg_num == 0, we are at a non-void return type. Otherwise, we are still processing arguments. */ if (arg_num == 0) return_type = eltype; else args = tree_cons (NULL_TREE, eltype, args); } ftype = build_function_type (return_type, args); gcc_assert (ftype != NULL); if (print_type_signature_p) snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s", d->name, type_signature); else snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", d->name); fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[fcode] = fndecl; } }
static bool propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) { rtx x = *px, tem = NULL_RTX, op0, op1, op2; enum rtx_code code = GET_CODE (x); machine_mode mode = GET_MODE (x); machine_mode op_mode; bool can_appear = (flags & PR_CAN_APPEAR) != 0; bool valid_ops = true; if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x)) { /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether they have side effects or not). */ *px = (side_effects_p (x) ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx) : gen_rtx_SCRATCH (GET_MODE (x))); return false; } /* If X is OLD_RTX, return NEW_RTX. But not if replacing only within an address, and we are *not* inside one. */ if (x == old_rtx) { *px = new_rtx; return can_appear; } /* If this is an expression, try recursive substitution. */ switch (GET_RTX_CLASS (code)) { case RTX_UNARY: op0 = XEXP (x, 0); op_mode = GET_MODE (op0); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0)) return true; tem = simplify_gen_unary (code, mode, op0, op_mode); break; case RTX_BIN_ARITH: case RTX_COMM_ARITH: op0 = XEXP (x, 0); op1 = XEXP (x, 1); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; tem = simplify_gen_binary (code, mode, op0, op1); break; case RTX_COMPARE: case RTX_COMM_COMPARE: op0 = XEXP (x, 0); op1 = XEXP (x, 1); op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; tem = simplify_gen_relational (code, mode, op_mode, op0, op1); break; case RTX_TERNARY: case RTX_BITFIELD_OPS: op0 = XEXP (x, 0); op1 = XEXP (x, 1); op2 = XEXP (x, 2); op_mode = GET_MODE (op0); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2)) return true; if (op_mode == VOIDmode) op_mode = GET_MODE (op0); tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2); break; case RTX_EXTRA: /* The only case we try to handle is a SUBREG. */ if (code == SUBREG) { op0 = XEXP (x, 0); valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0)) return true; tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); } break; case RTX_OBJ: if (code == MEM && x != new_rtx) { rtx new_op0; op0 = XEXP (x, 0); /* There are some addresses that we cannot work on. */ if (!can_simplify_addr (op0)) return true; op0 = new_op0 = targetm.delegitimize_address (op0); valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR); /* Dismiss transformation that we do not want to carry on. */ if (!valid_ops || new_op0 == op0 || !(GET_MODE (new_op0) == GET_MODE (op0) || GET_MODE (new_op0) == VOIDmode)) return true; canonicalize_address (new_op0); /* Copy propagations are always ok. Otherwise check the costs. */ if (!(REG_P (old_rtx) && REG_P (new_rtx)) && !should_replace_address (op0, new_op0, GET_MODE (x), MEM_ADDR_SPACE (x), flags & PR_OPTIMIZE_FOR_SPEED)) return true; tem = replace_equiv_address_nv (x, new_op0); } else if (code == LO_SUM) { op0 = XEXP (x, 0); op1 = XEXP (x, 1); /* The only simplification we do attempts to remove references to op0 or make it constant -- in both cases, op0's invalidity will not make the result invalid. */ propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR); valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; /* (lo_sum (high x) x) -> x */ if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1)) tem = op1; else tem = gen_rtx_LO_SUM (mode, op0, op1); /* OP1 is likely not a legitimate address, otherwise there would have been no LO_SUM. We want it to disappear if it is invalid, return false in that case. */ return memory_address_p (mode, tem); } else if (code == REG) { if (rtx_equal_p (x, old_rtx)) { *px = new_rtx; return can_appear; } } break; default: break; } /* No change, no trouble. */ if (tem == NULL_RTX) return true; *px = tem; /* Allow replacements that simplify operations on a vector or complex value to a component. The most prominent case is (subreg ([vec_]concat ...)). */ if (REG_P (tem) && !HARD_REGISTER_P (tem) && (VECTOR_MODE_P (GET_MODE (new_rtx)) || COMPLEX_MODE_P (GET_MODE (new_rtx))) && GET_MODE (tem) == GET_MODE_INNER (GET_MODE (new_rtx))) return true; /* The replacement we made so far is valid, if all of the recursive replacements were valid, or we could simplify everything to a constant. */ return valid_ops || can_appear || CONSTANT_P (tem); }
static void vect_pattern_recog_1 ( gimple (* vect_recog_func) (gimple, tree *, tree *), gimple_stmt_iterator si) { gimple stmt = gsi_stmt (si), pattern_stmt; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info pattern_stmt_info; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); tree pattern_vectype; tree type_in, type_out; enum tree_code code; int i; gimple next; pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); if (!pattern_stmt) return; if (VECTOR_MODE_P (TYPE_MODE (type_in))) { /* No need to check target support (already checked by the pattern recognition function). */ if (type_out) gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out))); pattern_vectype = type_out ? type_out : type_in; } else { enum machine_mode vec_mode; enum insn_code icode; optab optab; /* Check target support */ type_in = get_vectype_for_scalar_type (type_in); if (!type_in) return; if (type_out) type_out = get_vectype_for_scalar_type (type_out); else type_out = type_in; if (!type_out) return; pattern_vectype = type_out; if (is_gimple_assign (pattern_stmt)) code = gimple_assign_rhs_code (pattern_stmt); else { gcc_assert (is_gimple_call (pattern_stmt)); code = CALL_EXPR; } optab = optab_for_tree_code (code, type_in, optab_default); vec_mode = TYPE_MODE (type_in); if (!optab || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) return; } /* Found a vectorizable pattern. */ if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "pattern recognized: "); print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); } /* Mark the stmts that are involved in the pattern. */ gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT); set_vinfo_for_stmt (pattern_stmt, new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); pattern_stmt_info = vinfo_for_stmt (pattern_stmt); STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; STMT_VINFO_IN_PATTERN_P (stmt_info) = true; STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt; /* Patterns cannot be vectorized using SLP, because they change the order of computation. */ FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) if (next == stmt) VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); }
static void vect_pattern_recog_1 ( gimple (* vect_recog_func) (gimple, tree *, tree *), gimple_stmt_iterator si) { gimple stmt = gsi_stmt (si), pattern_stmt; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info pattern_stmt_info; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); tree pattern_vectype; tree type_in, type_out; enum tree_code code; pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); if (!pattern_stmt) return; if (VECTOR_MODE_P (TYPE_MODE (type_in))) { /* No need to check target support (already checked by the pattern recognition function). */ pattern_vectype = type_in; } else { enum tree_code vec_mode; enum insn_code icode; optab optab; /* Check target support */ pattern_vectype = get_vectype_for_scalar_type (type_in); if (!pattern_vectype) return; if (is_gimple_assign (pattern_stmt)) code = gimple_assign_rhs_code (pattern_stmt); else { gcc_assert (is_gimple_call (pattern_stmt)); code = CALL_EXPR; } optab = optab_for_tree_code (code, pattern_vectype, optab_default); vec_mode = TYPE_MODE (pattern_vectype); if (!optab || (icode = optab_handler (optab, vec_mode)->insn_code) == CODE_FOR_nothing || (type_out && (!get_vectype_for_scalar_type (type_out) || (insn_data[icode].operand[0].mode != TYPE_MODE (get_vectype_for_scalar_type (type_out)))))) return; } /* Found a vectorizable pattern. */ if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "pattern recognized: "); print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); } /* Mark the stmts that are involved in the pattern. */ gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT); set_vinfo_for_stmt (pattern_stmt, new_stmt_vec_info (pattern_stmt, loop_vinfo)); pattern_stmt_info = vinfo_for_stmt (pattern_stmt); STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; STMT_VINFO_IN_PATTERN_P (stmt_info) = true; STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt; return; }