/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be passed to us by the front-end, in which case we call it for large matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, const int *, const GFC_INTEGER_8 *, const GFC_INTEGER_8 *, const int *, const GFC_INTEGER_8 *, const int *, const GFC_INTEGER_8 *, GFC_INTEGER_8 *, const int *, int, int); #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray, gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_i8_avx128_fma3); void matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray, gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { const GFC_INTEGER_8 * restrict abase; const GFC_INTEGER_8 * restrict bbase; GFC_INTEGER_8 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; assert (GFC_DESCRIPTOR_RANK (a) == 2 || GFC_DESCRIPTOR_RANK (b) == 2);
/* PR target/36936 */ /* { dg-do compile } */ /* { dg-require-effective-target ilp32 } */ /* { dg-options "-O2 -march=i386" } */ /* { dg-final { scan-assembler "cmov" } } */ extern int foo (int) __attribute__((__target__("arch=i686"))); int foo (int x) { if (x < 0) x = 1; return x; }
/* PR target/69969 */ /* { dg-do compile } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8" } */ int bar (int x) { return x; } __attribute__((__target__("no-vsx"))) int foo (int x) { return x; } /* { dg-bogus "-mallow-movmisalign requires -mvsx" } */
/* { dg-do run } */ /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */ /* { dg-require-effective-target avx512f } */ /* { dg-skip-if "no M_PI" { vxworks_kernel } } */ #define __NO_MATH_INLINES #include <math.h> #include "avx512f-check.h" #define NUM 64 static void __attribute__((__target__("fpmath=sse"))) init_src (float *src) { int i, sign = 1; float f = rand (); for (i = 0; i < NUM; i++) { src[i] = (i + 1) * f * M_PI * sign; if (i < (NUM / 2)) { if ((i % 6) == 0) f = f * src[i]; } else if (i == (NUM / 2)) f = rand (); else if ((i % 6) == 0) f = 1 / (f * (i + 1) * src[i] * M_PI * sign); sign = -sign;
/* Test whether all of the 32-bit function specific options are accepted without error. */ /* { dg-do compile } */ /* { dg-require-effective-target ilp32 } */ extern void test_abm (void) __attribute__((__target__("abm"))); extern void test_aes (void) __attribute__((__target__("aes"))); extern void test_bmi (void) __attribute__((__target__("bmi"))); extern void test_mmx (void) __attribute__((__target__("mmx"))); extern void test_pclmul (void) __attribute__((__target__("pclmul"))); extern void test_popcnt (void) __attribute__((__target__("popcnt"))); extern void test_recip (void) __attribute__((__target__("recip"))); extern void test_sse (void) __attribute__((__target__("sse"))); extern void test_sse2 (void) __attribute__((__target__("sse2"))); extern void test_sse3 (void) __attribute__((__target__("sse3"))); extern void test_sse4 (void) __attribute__((__target__("sse4"))); extern void test_sse4_1 (void) __attribute__((__target__("sse4.1"))); extern void test_sse4_2 (void) __attribute__((__target__("sse4.2"))); extern void test_sse4a (void) __attribute__((__target__("sse4a"))); extern void test_fma4 (void) __attribute__((__target__("fma4"))); extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); extern void test_tbm (void) __attribute__((__target__("tbm"))); extern void test_no_abm (void) __attribute__((__target__("no-abm"))); extern void test_no_aes (void) __attribute__((__target__("no-aes"))); extern void test_no_bmi (void) __attribute__((__target__("no-bmi"))); extern void test_no_mmx (void) __attribute__((__target__("no-mmx"))); extern void test_no_pclmul (void) __attribute__((__target__("no-pclmul"))); extern void test_no_popcnt (void) __attribute__((__target__("no-popcnt"))); extern void test_no_recip (void) __attribute__((__target__("no-recip"))); extern void test_no_sse (void) __attribute__((__target__("no-sse")));
for (size_t index = 0; index < haystack.size(); ++index) { if (s.contains(haystack[index])) { return index; } } return StringPiece::npos; } #if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6) template <bool HAYSTACK_ALIGNED> inline size_t scanHaystackBlock(const StringPiece& haystack, const StringPiece& needles, int64_t idx) // inline is okay because it's only called from other sse4.2 functions __attribute__ ((__target__("sse4.2"))); // Scans a 16-byte block of haystack (starting at blockStartIdx) to find first // needle. If HAYSTACK_ALIGNED, then haystack must be 16byte aligned. // If !HAYSTACK_ALIGNED, then caller must ensure that it is safe to load the // block. template <bool HAYSTACK_ALIGNED> inline size_t scanHaystackBlock(const StringPiece& haystack, const StringPiece& needles, int64_t blockStartIdx) { DCHECK_GT(needles.size(), 16); // should handled by *needles16() method DCHECK(blockStartIdx + 16 <= haystack.size() || (PAGE_FOR(haystack.data() + blockStartIdx) == PAGE_FOR(haystack.data() + blockStartIdx + 15))); __v16qi arr1;
* GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Kvazaar. If not, see <http://www.gnu.org/licenses/>. ****************************************************************************/ /* * \file */ #include "../strategyselector.h" #include "../picture.h" #include <immintrin.h> #include <assert.h> #ifdef __GNUC__ __attribute__ ((__target__ ("sse2,sse4.1"))) #endif static unsigned reg_sad_sse41(const pixel * const data1, const pixel * const data2, const int width, const int height, const unsigned stride1, const unsigned stride2) { int y, x; unsigned sad = 0; __m128i sse_inc = _mm_setzero_si128 (); long long int sse_inc_array[2]; for (y = 0; y < height; ++y) { for (x = 0; x <= width-16; x+=16) { const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]); const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]); sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b)); }
/* Test whether all of the 64-bit function specific options are accepted without error. */ /* { dg-do compile } */ /* { dg-require-effective-target lp64 } */ extern void test_abm (void) __attribute__((__target__("abm"))); extern void test_aes (void) __attribute__((__target__("aes"))); extern void test_mmx (void) __attribute__((__target__("mmx"))); extern void test_pclmul (void) __attribute__((__target__("pclmul"))); extern void test_popcnt (void) __attribute__((__target__("popcnt"))); extern void test_recip (void) __attribute__((__target__("recip"))); extern void test_sse (void) __attribute__((__target__("sse"))); extern void test_sse2 (void) __attribute__((__target__("sse2"))); extern void test_sse3 (void) __attribute__((__target__("sse3"))); extern void test_sse4 (void) __attribute__((__target__("sse4"))); extern void test_sse4_1 (void) __attribute__((__target__("sse4.1"))); extern void test_sse4_2 (void) __attribute__((__target__("sse4.2"))); extern void test_sse4a (void) __attribute__((__target__("sse4a"))); extern void test_fma4 (void) __attribute__((__target__("fma4"))); extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); extern void test_no_abm (void) __attribute__((__target__("no-abm"))); extern void test_no_aes (void) __attribute__((__target__("no-aes"))); extern void test_no_mmx (void) __attribute__((__target__("no-mmx"))); extern void test_no_pclmul (void) __attribute__((__target__("no-pclmul"))); extern void test_no_popcnt (void) __attribute__((__target__("no-popcnt"))); extern void test_no_recip (void) __attribute__((__target__("no-recip"))); extern void test_no_sse (void) __attribute__((__target__("no-sse"))); extern void test_no_sse2 (void) __attribute__((__target__("no-sse2"))); extern void test_no_sse3 (void) __attribute__((__target__("no-sse3"))); extern void test_no_sse4 (void) __attribute__((__target__("no-sse4")));
// RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify // RUN: %clang_cc1 -fsyntax-only -ffreestanding -fno-lax-vector-conversions %s -verify // RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify // expected-no-diagnostics #if defined(i386) || defined(__x86_64__) // Include the metaheader that includes all x86 intrinsic headers. #include <x86intrin.h> void __attribute__((__target__("mmx"))) mm_empty_wrap(void) { _mm_empty(); } __m128 __attribute__((__target__("sse"))) mm_add_ss_wrap(__m128 a, __m128 b) { return _mm_add_ss(a, b); } __m128d __attribute__((__target__("sse2"))) mm_sqrt_sd_wrap(__m128d a, __m128d b) { return _mm_sqrt_sd(a, b); } void __attribute__((__target__("sse3"))) mm_mwait_wrap(int a) { _mm_mwait(0, 0); } __m64 __attribute__((__target__("ssse3"))) mm_abs_pi8_wrap(__m64 a) { return _mm_abs_pi8(a); } __m128i __attribute__((__target__("sse4.1"))) mm_minpos_epu16_wrap(__m128i v) {
/* Test whether all of the 64-bit function specific options are accepted without error. */ /* { dg-do compile { target { ! ia32 } } } */ #include "funcspec-56.inc" extern void test_arch_foo (void) __attribute__((__target__("arch=foo"))); /* { dg-error "bad value" } */ extern void test_tune_foo (void) __attribute__((__target__("tune=foo"))); /* { dg-error "bad value" } */
/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */ /* { dg-options "-O2 -ffast-math -mcpu=power5 -mabi=no-altivec" } */ /* { dg-final { scan-assembler-times "fabs" 3 } } */ /* { dg-final { scan-assembler-times "fnabs" 3 } } */ /* { dg-final { scan-assembler-times "fsel" 3 } } */ /* { dg-final { scan-assembler-times "fcpsgn" 4 } } */ /* { dg-final { scan-assembler-not "xscpsgndp" } } */ /* Like ppc-target-1.c, but do not enable the altivec abi on 32-bit, so the power7 code should generate fcpsgn and not xscpsgndp. */ double normal1 (double, double); double power5 (double, double) __attribute__((__target__("cpu=power5"))); double power6 (double, double) __attribute__((__target__("cpu=power6"))); double power6x (double, double) __attribute__((__target__("cpu=power6x"))); double power7 (double, double) __attribute__((__target__("cpu=power7"))); double power7n (double, double) __attribute__((__target__("cpu=power7,no-vsx"))); double normal2 (double, double); /* fabs/fnabs/fsel */ double normal1 (double a, double b) { return __builtin_copysign (a, b); } /* fabs/fnabs/fsel */ double power5 (double a, double b) {
// RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify // RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify // expected-no-diagnostics #if defined(i386) || defined(__x86_64__) #include <pmmintrin.h> int __attribute__((__target__(("sse3")))) foo(int a) { _mm_mwait(0, 0); return 4; } #endif