My little initializer script
This commit is contained in:
134
clangd/lib/clang/18/include/ppc_wrappers/bmi2intrin.h
Normal file
134
clangd/lib/clang/18/include/ppc_wrappers/bmi2intrin.h
Normal file
@@ -0,0 +1,134 @@
|
||||
/*===---- bmiintrin.h - Implementation of BMI2 intrinsics on PowerPC -------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined X86GPRINTRIN_H_
|
||||
#error "Never use <bmi2intrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef BMI2INTRIN_H_
|
||||
#define BMI2INTRIN_H_
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u32(unsigned int __X, unsigned int __Y) {
|
||||
return ((__X << (32 - __Y)) >> (32 - __Y));
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
|
||||
unsigned long long __res = (unsigned long long)__X * __Y;
|
||||
*__P = (unsigned int)(__res >> 32);
|
||||
return (unsigned int)__res;
|
||||
}
|
||||
|
||||
#ifdef __PPC64__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
return ((__X << (64 - __Y)) >> (64 - __Y));
|
||||
}
|
||||
|
||||
/* __int128 requires base 64-bit. */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u64(unsigned long long __X, unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
unsigned __int128 __res = (unsigned __int128)__X * __Y;
|
||||
*__P = (unsigned long long)(__res >> 64);
|
||||
return (unsigned long long)__res;
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR7
|
||||
/* popcount and bpermd require power7 minimum. */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u64(unsigned long long __X, unsigned long long __M) {
|
||||
unsigned long __result = 0x0UL;
|
||||
const unsigned long __mask = 0x8000000000000000UL;
|
||||
unsigned long __m = __M;
|
||||
unsigned long __c, __t;
|
||||
unsigned long __p;
|
||||
|
||||
/* The pop-count of the mask gives the number of the bits from
|
||||
source to process. This is also needed to shift bits from the
|
||||
source into the correct position for the result. */
|
||||
__p = 64 - __builtin_popcountl(__M);
|
||||
|
||||
/* The loop is for the number of '1' bits in the mask and clearing
|
||||
each mask bit as it is processed. */
|
||||
while (__m != 0) {
|
||||
__c = __builtin_clzl(__m);
|
||||
__t = __X << (__p - __c);
|
||||
__m ^= (__mask >> __c);
|
||||
__result |= (__t & (__mask >> __c));
|
||||
__p++;
|
||||
}
|
||||
return __result;
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u64(unsigned long long __X, unsigned long long __M) {
|
||||
unsigned long __p = 0x4040404040404040UL; // initial bit permute control
|
||||
const unsigned long __mask = 0x8000000000000000UL;
|
||||
unsigned long __m = __M;
|
||||
unsigned long __c;
|
||||
unsigned long __result;
|
||||
|
||||
/* if the mask is constant and selects 8 bits or less we can use
|
||||
the Power8 Bit permute instruction. */
|
||||
if (__builtin_constant_p(__M) && (__builtin_popcountl(__M) <= 8)) {
|
||||
/* Also if the pext mask is constant, then the popcount is
|
||||
constant, we can evaluate the following loop at compile
|
||||
time and use a constant bit permute vector. */
|
||||
long __i;
|
||||
for (__i = 0; __i < __builtin_popcountl(__M); __i++) {
|
||||
__c = __builtin_clzl(__m);
|
||||
__p = (__p << 8) | __c;
|
||||
__m ^= (__mask >> __c);
|
||||
}
|
||||
__result = __builtin_bpermd(__p, __X);
|
||||
} else {
|
||||
__p = 64 - __builtin_popcountl(__M);
|
||||
__result = 0;
|
||||
/* We could a use a for loop here, but that combined with
|
||||
-funroll-loops can expand to a lot of code. The while
|
||||
loop avoids unrolling and the compiler commons the xor
|
||||
from clearing the mask bit with the (m != 0) test. The
|
||||
result is a more compact loop setup and body. */
|
||||
while (__m != 0) {
|
||||
unsigned long __t;
|
||||
__c = __builtin_clzl(__m);
|
||||
__t = (__X & (__mask >> __c)) >> (__p - __c);
|
||||
__m ^= (__mask >> __c);
|
||||
__result |= (__t);
|
||||
__p++;
|
||||
}
|
||||
}
|
||||
return __result;
|
||||
}
|
||||
|
||||
/* these 32-bit implementations depend on 64-bit pdep/pext
|
||||
which depend on _ARCH_PWR7. */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u32(unsigned int __X, unsigned int __Y) {
|
||||
return _pdep_u64(__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u32(unsigned int __X, unsigned int __Y) {
|
||||
return _pext_u64(__X, __Y);
|
||||
}
|
||||
#endif /* _ARCH_PWR7 */
|
||||
#endif /* __PPC64__ */
|
||||
|
||||
#endif /* BMI2INTRIN_H_ */
|
||||
165
clangd/lib/clang/18/include/ppc_wrappers/bmiintrin.h
Normal file
165
clangd/lib/clang/18/include/ppc_wrappers/bmiintrin.h
Normal file
@@ -0,0 +1,165 @@
|
||||
/*===---- bmiintrin.h - Implementation of BMI intrinsics on PowerPC --------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined X86GPRINTRIN_H_
|
||||
#error "Never use <bmiintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef BMIINTRIN_H_
|
||||
#define BMIINTRIN_H_
|
||||
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u16(unsigned short __X) {
|
||||
return __builtin_ctz(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u32(unsigned int __X, unsigned int __Y) {
|
||||
return (~__X & __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u32(unsigned int __X, unsigned int __P, unsigned int __L) {
|
||||
return ((__X << (32 - (__L + __P))) >> (32 - __L));
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u32(unsigned int __X, unsigned int __Y) {
|
||||
unsigned int __P, __L;
|
||||
__P = __Y & 0xFF;
|
||||
__L = (__Y >> 8) & 0xFF;
|
||||
return (_bextr_u32(__X, __P, __L));
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u32(unsigned int __X) {
|
||||
return (__X & -__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u32(unsigned int __X) {
|
||||
return __blsi_u32(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u32(unsigned int __X) {
|
||||
return (__X ^ (__X - 1));
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u32(unsigned int __X) {
|
||||
return __blsmsk_u32(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u32(unsigned int __X) {
|
||||
return (__X & (__X - 1));
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u32(unsigned int __X) {
|
||||
return __blsr_u32(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ctz(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ctz(__X);
|
||||
}
|
||||
|
||||
/* use the 64-bit shift, rotate, and count leading zeros instructions
|
||||
for long long. */
|
||||
#ifdef __PPC64__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
return (~__X & __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u64(unsigned long long __X, unsigned int __P, unsigned int __L) {
|
||||
return ((__X << (64 - (__L + __P))) >> (64 - __L));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
unsigned int __P, __L;
|
||||
__P = __Y & 0xFF;
|
||||
__L = (__Y & 0xFF00) >> 8;
|
||||
return (_bextr_u64(__X, __P, __L));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u64(unsigned long long __X) {
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u64(unsigned long long __X) {
|
||||
return __blsi_u64(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u64(unsigned long long __X) {
|
||||
return (__X ^ (__X - 1));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u64(unsigned long long __X) {
|
||||
return __blsmsk_u64(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u64(unsigned long long __X) {
|
||||
return (__X & (__X - 1));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u64(unsigned long long __X) {
|
||||
return __blsr_u64(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ctzll(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ctzll(__X);
|
||||
}
|
||||
#endif /* __PPC64__ */
|
||||
|
||||
#endif /* BMIINTRIN_H_ */
|
||||
2269
clangd/lib/clang/18/include/ppc_wrappers/emmintrin.h
Normal file
2269
clangd/lib/clang/18/include/ppc_wrappers/emmintrin.h
Normal file
File diff suppressed because it is too large
Load Diff
27
clangd/lib/clang/18/include/ppc_wrappers/immintrin.h
Normal file
27
clangd/lib/clang/18/include/ppc_wrappers/immintrin.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/*===---- immintrin.h - Implementation of Intel intrinsics on PowerPC ------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef IMMINTRIN_H_
|
||||
#define IMMINTRIN_H_
|
||||
|
||||
#include <x86gprintrin.h>
|
||||
|
||||
#include <mmintrin.h>
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include <pmmintrin.h>
|
||||
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#endif /* IMMINTRIN_H_ */
|
||||
45
clangd/lib/clang/18/include/ppc_wrappers/mm_malloc.h
Normal file
45
clangd/lib/clang/18/include/ppc_wrappers/mm_malloc.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _MM_MALLOC_H_INCLUDED
|
||||
#define _MM_MALLOC_H_INCLUDED
|
||||
|
||||
#if defined(__powerpc64__) && \
|
||||
(defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* We can't depend on <stdlib.h> since the prototype of posix_memalign
|
||||
may not be visible. */
|
||||
#ifndef __cplusplus
|
||||
extern int posix_memalign(void **, size_t, size_t);
|
||||
#else
|
||||
extern "C" int posix_memalign(void **, size_t, size_t);
|
||||
#endif
|
||||
|
||||
static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
|
||||
/* PowerPC64 ELF V2 ABI requires quadword alignment. */
|
||||
size_t __vec_align = sizeof(__vector float);
|
||||
void *__ptr;
|
||||
|
||||
if (__alignment < __vec_align)
|
||||
__alignment = __vec_align;
|
||||
if (posix_memalign(&__ptr, __alignment, __size) == 0)
|
||||
return __ptr;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static __inline void _mm_free(void *__ptr) { free(__ptr); }
|
||||
|
||||
#else
|
||||
#include_next <mm_malloc.h>
|
||||
#endif
|
||||
|
||||
#endif /* _MM_MALLOC_H_INCLUDED */
|
||||
1453
clangd/lib/clang/18/include/ppc_wrappers/mmintrin.h
Normal file
1453
clangd/lib/clang/18/include/ppc_wrappers/mmintrin.h
Normal file
File diff suppressed because it is too large
Load Diff
26
clangd/lib/clang/18/include/ppc_wrappers/nmmintrin.h
Normal file
26
clangd/lib/clang/18/include/ppc_wrappers/nmmintrin.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*===---- nmmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef NO_WARN_X86_INTRINSICS
|
||||
/* This header is distributed to simplify porting x86_64 code that
|
||||
makes explicit use of Intel intrinsics to powerpc64le.
|
||||
It is the user's responsibility to determine if the results are
|
||||
acceptable and make additional changes as necessary.
|
||||
Note that much code that uses Intel intrinsics can be rewritten in
|
||||
standard C or GNU C extensions, which are more portable and better
|
||||
optimized across multiple targets. */
|
||||
#endif
|
||||
|
||||
#ifndef NMMINTRIN_H_
|
||||
#define NMMINTRIN_H_
|
||||
|
||||
/* We just include SSE4.1 header file. */
|
||||
#include <smmintrin.h>
|
||||
|
||||
#endif /* NMMINTRIN_H_ */
|
||||
145
clangd/lib/clang/18/include/ppc_wrappers/pmmintrin.h
Normal file
145
clangd/lib/clang/18/include/ppc_wrappers/pmmintrin.h
Normal file
@@ -0,0 +1,145 @@
|
||||
/*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 9.0. */
|
||||
|
||||
#ifndef NO_WARN_X86_INTRINSICS
|
||||
/* This header is distributed to simplify porting x86_64 code that
|
||||
makes explicit use of Intel intrinsics to powerpc64le.
|
||||
It is the user's responsibility to determine if the results are
|
||||
acceptable and make additional changes as necessary.
|
||||
Note that much code that uses Intel intrinsics can be rewritten in
|
||||
standard C or GNU C extensions, which are more portable and better
|
||||
optimized across multiple targets.
|
||||
|
||||
In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA
|
||||
is a good match for most SIMD operations. However the Horizontal
|
||||
add/sub requires the data pairs be permuted into a separate
|
||||
registers with vertical even/odd alignment for the operation.
|
||||
And the addsub operation requires the sign of only the even numbered
|
||||
elements be flipped (xored with -0.0).
|
||||
For larger blocks of code using these intrinsic implementations,
|
||||
the compiler be should be able to schedule instructions to avoid
|
||||
additional latency.
|
||||
|
||||
In the specific case of the monitor and mwait instructions there are
|
||||
no direct equivalent in the PowerISA at this time. So those
|
||||
intrinsics are not implemented. */
|
||||
#error \
|
||||
"Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning."
|
||||
#endif
|
||||
|
||||
#ifndef PMMINTRIN_H_
|
||||
#define PMMINTRIN_H_
|
||||
|
||||
#if defined(__powerpc64__) && \
|
||||
(defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
|
||||
|
||||
/* We need definitions from the SSE2 and SSE header files*/
|
||||
#include <emmintrin.h>
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_addsub_ps(__m128 __X, __m128 __Y) {
|
||||
const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0};
|
||||
__v4sf __even_neg_Y = vec_xor(__Y, __even_n0);
|
||||
return (__m128)vec_add(__X, __even_neg_Y);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_addsub_pd(__m128d __X, __m128d __Y) {
|
||||
const __v2df __even_n0 = {-0.0, 0.0};
|
||||
__v2df __even_neg_Y = vec_xor(__Y, __even_n0);
|
||||
return (__m128d)vec_add(__X, __even_neg_Y);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_ps(__m128 __X, __m128 __Y) {
|
||||
__vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09,
|
||||
0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13,
|
||||
0x18, 0x19, 0x1A, 0x1B};
|
||||
__vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D,
|
||||
0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17,
|
||||
0x1C, 0x1D, 0x1E, 0x1F};
|
||||
return (__m128)vec_add(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2),
|
||||
vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1));
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_ps(__m128 __X, __m128 __Y) {
|
||||
__vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09,
|
||||
0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13,
|
||||
0x18, 0x19, 0x1A, 0x1B};
|
||||
__vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D,
|
||||
0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17,
|
||||
0x1C, 0x1D, 0x1E, 0x1F};
|
||||
return (__m128)vec_sub(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2),
|
||||
vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1));
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_pd(__m128d __X, __m128d __Y) {
|
||||
return (__m128d)vec_add(vec_mergeh((__v2df)__X, (__v2df)__Y),
|
||||
vec_mergel((__v2df)__X, (__v2df)__Y));
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_pd(__m128d __X, __m128d __Y) {
|
||||
return (__m128d)vec_sub(vec_mergeh((__v2df)__X, (__v2df)__Y),
|
||||
vec_mergel((__v2df)__X, (__v2df)__Y));
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_movehdup_ps(__m128 __X) {
|
||||
return (__m128)vec_mergeo((__v4su)__X, (__v4su)__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_moveldup_ps(__m128 __X) {
|
||||
return (__m128)vec_mergee((__v4su)__X, (__v4su)__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_loaddup_pd(double const *__P) {
|
||||
return (__m128d)vec_splats(*__P);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_movedup_pd(__m128d __X) {
|
||||
return _mm_shuffle_pd(__X, __X, _MM_SHUFFLE2(0, 0));
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_lddqu_si128(__m128i const *__P) {
|
||||
return (__m128i)(vec_vsx_ld(0, (signed int const *)__P));
|
||||
}
|
||||
|
||||
/* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */
|
||||
|
||||
#else
|
||||
#include_next <pmmintrin.h>
|
||||
#endif /* defined(__powerpc64__) && \
|
||||
* (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
|
||||
|
||||
#endif /* PMMINTRIN_H_ */
|
||||
683
clangd/lib/clang/18/include/ppc_wrappers/smmintrin.h
Normal file
683
clangd/lib/clang/18/include/ppc_wrappers/smmintrin.h
Normal file
@@ -0,0 +1,683 @@
|
||||
/*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 9.0.
|
||||
|
||||
NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
|
||||
|
||||
#ifndef NO_WARN_X86_INTRINSICS
|
||||
/* This header is distributed to simplify porting x86_64 code that
|
||||
makes explicit use of Intel intrinsics to powerpc64/powerpc64le.
|
||||
|
||||
It is the user's responsibility to determine if the results are
|
||||
acceptable and make additional changes as necessary.
|
||||
|
||||
Note that much code that uses Intel intrinsics can be rewritten in
|
||||
standard C or GNU C extensions, which are more portable and better
|
||||
optimized across multiple targets. */
|
||||
#error \
|
||||
"Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
||||
#endif
|
||||
|
||||
#ifndef SMMINTRIN_H_
|
||||
#define SMMINTRIN_H_
|
||||
|
||||
#if defined(__powerpc64__) && \
|
||||
(defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
|
||||
|
||||
#include <altivec.h>
|
||||
#include <tmmintrin.h>
|
||||
|
||||
/* Rounding mode macros. */
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0x00
|
||||
#define _MM_FROUND_TO_ZERO 0x01
|
||||
#define _MM_FROUND_TO_POS_INF 0x02
|
||||
#define _MM_FROUND_TO_NEG_INF 0x03
|
||||
#define _MM_FROUND_CUR_DIRECTION 0x04
|
||||
|
||||
#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
|
||||
|
||||
#define _MM_FROUND_RAISE_EXC 0x00
|
||||
#define _MM_FROUND_NO_EXC 0x08
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_pd(__m128d __A, int __rounding) {
|
||||
__v2df __r;
|
||||
union {
|
||||
double __fr;
|
||||
long long __fpscr;
|
||||
} __enables_save, __fpscr_save;
|
||||
|
||||
if (__rounding & _MM_FROUND_NO_EXC) {
|
||||
/* Save enabled exceptions, disable all exceptions,
|
||||
and preserve the rounding mode. */
|
||||
#ifdef _ARCH_PWR9
|
||||
__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
|
||||
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
|
||||
__fpscr_save.__fpscr &= ~0xf8;
|
||||
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
#endif
|
||||
/* Insert an artificial "read/write" reference to the variable
|
||||
read below, to ensure the compiler does not schedule
|
||||
a read/use of the variable before the FPSCR is modified, above.
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : "+wa"(__A));
|
||||
}
|
||||
|
||||
switch (__rounding) {
|
||||
case _MM_FROUND_TO_NEAREST_INT:
|
||||
#ifdef _ARCH_PWR9
|
||||
__fpscr_save.__fr = __builtin_ppc_mffsl();
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__fpscr_save.__fpscr &= 0x70007f0ffL;
|
||||
#endif
|
||||
__attribute__((fallthrough));
|
||||
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
|
||||
__builtin_ppc_set_fpscr_rn(0b00);
|
||||
/* Insert an artificial "read/write" reference to the variable
|
||||
read below, to ensure the compiler does not schedule
|
||||
a read/use of the variable before the FPSCR is modified, above.
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : "+wa"(__A));
|
||||
|
||||
__r = vec_rint((__v2df)__A);
|
||||
|
||||
/* Insert an artificial "read" reference to the variable written
|
||||
above, to ensure the compiler does not schedule the computation
|
||||
of the value after the manipulation of the FPSCR, below.
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : : "wa"(__r));
|
||||
__builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
|
||||
break;
|
||||
case _MM_FROUND_TO_NEG_INF:
|
||||
case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
|
||||
__r = vec_floor((__v2df)__A);
|
||||
break;
|
||||
case _MM_FROUND_TO_POS_INF:
|
||||
case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
|
||||
__r = vec_ceil((__v2df)__A);
|
||||
break;
|
||||
case _MM_FROUND_TO_ZERO:
|
||||
case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
|
||||
__r = vec_trunc((__v2df)__A);
|
||||
break;
|
||||
case _MM_FROUND_CUR_DIRECTION:
|
||||
__r = vec_rint((__v2df)__A);
|
||||
break;
|
||||
}
|
||||
if (__rounding & _MM_FROUND_NO_EXC) {
|
||||
/* Insert an artificial "read" reference to the variable written
|
||||
above, to ensure the compiler does not schedule the computation
|
||||
of the value after the manipulation of the FPSCR, below.
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : : "wa"(__r));
|
||||
/* Restore enabled exceptions. */
|
||||
#ifdef _ARCH_PWR9
|
||||
__fpscr_save.__fr = __builtin_ppc_mffsl();
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__fpscr_save.__fpscr &= 0x70007f0ffL;
|
||||
#endif
|
||||
__fpscr_save.__fpscr |= __enables_save.__fpscr;
|
||||
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
}
|
||||
return (__m128d)__r;
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_sd(__m128d __A, __m128d __B, int __rounding) {
|
||||
__B = _mm_round_pd(__B, __rounding);
|
||||
__v2df __r = {((__v2df)__B)[0], ((__v2df)__A)[1]};
|
||||
return (__m128d)__r;
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_ps(__m128 __A, int __rounding) {
|
||||
__v4sf __r;
|
||||
union {
|
||||
double __fr;
|
||||
long long __fpscr;
|
||||
} __enables_save, __fpscr_save;
|
||||
|
||||
if (__rounding & _MM_FROUND_NO_EXC) {
|
||||
/* Save enabled exceptions, disable all exceptions,
|
||||
and preserve the rounding mode. */
|
||||
#ifdef _ARCH_PWR9
|
||||
__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
|
||||
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
|
||||
__fpscr_save.__fpscr &= ~0xf8;
|
||||
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
#endif
|
||||
/* Insert an artificial "read/write" reference to the variable
|
||||
read below, to ensure the compiler does not schedule
|
||||
a read/use of the variable before the FPSCR is modified, above.
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : "+wa"(__A));
|
||||
}
|
||||
|
||||
switch (__rounding) {
|
||||
case _MM_FROUND_TO_NEAREST_INT:
|
||||
#ifdef _ARCH_PWR9
|
||||
__fpscr_save.__fr = __builtin_ppc_mffsl();
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__fpscr_save.__fpscr &= 0x70007f0ffL;
|
||||
#endif
|
||||
__attribute__((fallthrough));
|
||||
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
|
||||
__builtin_ppc_set_fpscr_rn(0b00);
|
||||
/* Insert an artificial "read/write" reference to the variable
|
||||
read below, to ensure the compiler does not schedule
|
||||
a read/use of the variable before the FPSCR is modified, above.
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : "+wa"(__A));
|
||||
|
||||
__r = vec_rint((__v4sf)__A);
|
||||
|
||||
/* Insert an artificial "read" reference to the variable written
|
||||
above, to ensure the compiler does not schedule the computation
|
||||
of the value after the manipulation of the FPSCR, below.
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : : "wa"(__r));
|
||||
__builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
|
||||
break;
|
||||
case _MM_FROUND_TO_NEG_INF:
|
||||
case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
|
||||
__r = vec_floor((__v4sf)__A);
|
||||
break;
|
||||
case _MM_FROUND_TO_POS_INF:
|
||||
case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
|
||||
__r = vec_ceil((__v4sf)__A);
|
||||
break;
|
||||
case _MM_FROUND_TO_ZERO:
|
||||
case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
|
||||
__r = vec_trunc((__v4sf)__A);
|
||||
break;
|
||||
case _MM_FROUND_CUR_DIRECTION:
|
||||
__r = vec_rint((__v4sf)__A);
|
||||
break;
|
||||
}
|
||||
if (__rounding & _MM_FROUND_NO_EXC) {
|
||||
/* Insert an artificial "read" reference to the variable written
|
||||
above, to ensure the compiler does not schedule the computation
|
||||
of the value after the manipulation of the FPSCR, below.
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : : "wa"(__r));
|
||||
/* Restore enabled exceptions. */
|
||||
#ifdef _ARCH_PWR9
|
||||
__fpscr_save.__fr = __builtin_ppc_mffsl();
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__fpscr_save.__fpscr &= 0x70007f0ffL;
|
||||
#endif
|
||||
__fpscr_save.__fpscr |= __enables_save.__fpscr;
|
||||
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
}
|
||||
return (__m128)__r;
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_ss(__m128 __A, __m128 __B, int __rounding) {
|
||||
__B = _mm_round_ps(__B, __rounding);
|
||||
__v4sf __r = (__v4sf)__A;
|
||||
__r[0] = ((__v4sf)__B)[0];
|
||||
return (__m128)__r;
|
||||
}
|
||||
|
||||
#define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL)
|
||||
#define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL)
|
||||
|
||||
#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
|
||||
#define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR)
|
||||
|
||||
#define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL)
|
||||
#define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL)
|
||||
|
||||
#define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR)
|
||||
#define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi8(__m128i const __A, int const __D, int const __N) {
|
||||
__v16qi __result = (__v16qi)__A;
|
||||
|
||||
__result[__N & 0xf] = __D;
|
||||
|
||||
return (__m128i)__result;
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi32(__m128i const __A, int const __D, int const __N) {
|
||||
__v4si __result = (__v4si)__A;
|
||||
|
||||
__result[__N & 3] = __D;
|
||||
|
||||
return (__m128i)__result;
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi64(__m128i const __A, long long const __D, int const __N) {
|
||||
__v2di __result = (__v2di)__A;
|
||||
|
||||
__result[__N & 1] = __D;
|
||||
|
||||
return (__m128i)__result;
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi8(__m128i __X, const int __N) {
|
||||
return (unsigned char)((__v16qi)__X)[__N & 15];
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi32(__m128i __X, const int __N) {
|
||||
return ((__v4si)__X)[__N & 3];
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi64(__m128i __X, const int __N) {
|
||||
return ((__v2di)__X)[__N & 1];
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_ps(__m128 __X, const int __N) {
|
||||
return ((__v4si)__X)[__N & 3];
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) {
|
||||
__v16qu __charmask = vec_splats((unsigned char)__imm8);
|
||||
__charmask = vec_gb(__charmask);
|
||||
__v8hu __shortmask = (__v8hu)vec_unpackh((__v16qi)__charmask);
|
||||
#ifdef __BIG_ENDIAN__
|
||||
__shortmask = vec_reve(__shortmask);
|
||||
#endif
|
||||
return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) {
|
||||
#ifdef _ARCH_PWR10
|
||||
return (__m128i)vec_blendv((__v16qi)__A, (__v16qi)__B, (__v16qu)__mask);
|
||||
#else
|
||||
const __v16qu __seven = vec_splats((unsigned char)0x07);
|
||||
__v16qu __lmask = vec_sra((__v16qu)__mask, __seven);
|
||||
return (__m128i)vec_sel((__v16qi)__A, (__v16qi)__B, __lmask);
|
||||
#endif
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_ps(__m128 __A, __m128 __B, const int __imm8) {
|
||||
__v16qu __pcv[] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
|
||||
{16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
|
||||
{0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
|
||||
{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
|
||||
{16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
|
||||
{0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
|
||||
{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
|
||||
{16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
|
||||
{0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
|
||||
{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
|
||||
};
|
||||
__v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
|
||||
return (__m128)__r;
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_ps(__m128 __A, __m128 __B, __m128 __mask) {
|
||||
#ifdef _ARCH_PWR10
|
||||
return (__m128)vec_blendv((__v4sf)__A, (__v4sf)__B, (__v4su)__mask);
|
||||
#else
|
||||
const __v4si __zero = {0};
|
||||
const __vector __bool int __boolmask = vec_cmplt((__v4si)__mask, __zero);
|
||||
return (__m128)vec_sel((__v4su)__A, (__v4su)__B, (__v4su)__boolmask);
|
||||
#endif
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_pd(__m128d __A, __m128d __B, const int __imm8) {
|
||||
__v16qu __pcv[] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
|
||||
{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}};
|
||||
__v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
|
||||
return (__m128d)__r;
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_pd(__m128d __A, __m128d __B, __m128d __mask) {
|
||||
#ifdef _ARCH_PWR10
|
||||
return (__m128d)vec_blendv((__v2df)__A, (__v2df)__B, (__v2du)__mask);
|
||||
#else
|
||||
const __v2di __zero = {0};
|
||||
const __vector __bool long long __boolmask =
|
||||
vec_cmplt((__v2di)__mask, __zero);
|
||||
return (__m128d)vec_sel((__v2du)__A, (__v2du)__B, (__v2du)__boolmask);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testz_si128(__m128i __A, __m128i __B) {
|
||||
/* Note: This implementation does NOT set "zero" or "carry" flags. */
|
||||
const __v16qu __zero = {0};
|
||||
return vec_all_eq(vec_and((__v16qu)__A, (__v16qu)__B), __zero);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testc_si128(__m128i __A, __m128i __B) {
|
||||
/* Note: This implementation does NOT set "zero" or "carry" flags. */
|
||||
const __v16qu __zero = {0};
|
||||
const __v16qu __notA = vec_nor((__v16qu)__A, (__v16qu)__A);
|
||||
return vec_all_eq(vec_and((__v16qu)__notA, (__v16qu)__B), __zero);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testnzc_si128(__m128i __A, __m128i __B) {
|
||||
/* Note: This implementation does NOT set "zero" or "carry" flags. */
|
||||
return _mm_testz_si128(__A, __B) == 0 && _mm_testc_si128(__A, __B) == 0;
|
||||
}
|
||||
|
||||
#define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V))
|
||||
|
||||
#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
|
||||
|
||||
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_cmpeq((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_min((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epu16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_min((__v8hu)__X, (__v8hu)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_min((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epu32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_min((__v4su)__X, (__v4su)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_max((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epu16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_max((__v8hu)__X, (__v8hu)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_max((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epu32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_max((__v4su)__X, (__v4su)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mullo_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_mul((__v4su)__X, (__v4su)__Y);
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mul_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_mule((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi16(__m128i __A) {
|
||||
return (__m128i)vec_unpackh((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi32(__m128i __A) {
|
||||
__A = (__m128i)vec_unpackh((__v16qi)__A);
|
||||
return (__m128i)vec_unpackh((__v8hi)__A);
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi64(__m128i __A) {
|
||||
__A = (__m128i)vec_unpackh((__v16qi)__A);
|
||||
__A = (__m128i)vec_unpackh((__v8hi)__A);
|
||||
return (__m128i)vec_unpackh((__v4si)__A);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi16_epi32(__m128i __A) {
|
||||
return (__m128i)vec_unpackh((__v8hi)__A);
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi16_epi64(__m128i __A) {
|
||||
__A = (__m128i)vec_unpackh((__v8hi)__A);
|
||||
return (__m128i)vec_unpackh((__v4si)__A);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi32_epi64(__m128i __A) {
|
||||
return (__m128i)vec_unpackh((__v4si)__A);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi16(__m128i __A) {
|
||||
const __v16qu __zero = {0};
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
|
||||
#else /* __BIG_ENDIAN__. */
|
||||
__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
|
||||
#endif /* __BIG_ENDIAN__. */
|
||||
return __A;
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi32(__m128i __A) {
|
||||
const __v16qu __zero = {0};
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
|
||||
__A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
|
||||
#else /* __BIG_ENDIAN__. */
|
||||
__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
|
||||
__A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
|
||||
#endif /* __BIG_ENDIAN__. */
|
||||
return __A;
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi64(__m128i __A) {
|
||||
const __v16qu __zero = {0};
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
|
||||
__A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
|
||||
__A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
|
||||
#else /* __BIG_ENDIAN__. */
|
||||
__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
|
||||
__A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
|
||||
__A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
|
||||
#endif /* __BIG_ENDIAN__. */
|
||||
return __A;
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu16_epi32(__m128i __A) {
|
||||
const __v8hu __zero = {0};
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
|
||||
#else /* __BIG_ENDIAN__. */
|
||||
__A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
|
||||
#endif /* __BIG_ENDIAN__. */
|
||||
return __A;
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu16_epi64(__m128i __A) {
|
||||
const __v8hu __zero = {0};
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
|
||||
__A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
|
||||
#else /* __BIG_ENDIAN__. */
|
||||
__A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
|
||||
__A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
|
||||
#endif /* __BIG_ENDIAN__. */
|
||||
return __A;
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu32_epi64(__m128i __A) {
|
||||
const __v4su __zero = {0};
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_mergeh((__v4su)__A, __zero);
|
||||
#else /* __BIG_ENDIAN__. */
|
||||
__A = (__m128i)vec_mergeh(__zero, (__v4su)__A);
|
||||
#endif /* __BIG_ENDIAN__. */
|
||||
return __A;
|
||||
}
|
||||
|
||||
/* Return horizontal packed word minimum and its index in bits [15:0]
|
||||
and bits [18:16] respectively. */
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_minpos_epu16(__m128i __A) {
|
||||
union __u {
|
||||
__m128i __m;
|
||||
__v8hu __uh;
|
||||
};
|
||||
union __u __u = {.__m = __A}, __r = {.__m = {0}};
|
||||
unsigned short __ridx = 0;
|
||||
unsigned short __rmin = __u.__uh[__ridx];
|
||||
unsigned long __i;
|
||||
for (__i = 1; __i < 8; __i++) {
|
||||
if (__u.__uh[__i] < __rmin) {
|
||||
__rmin = __u.__uh[__i];
|
||||
__ridx = __i;
|
||||
}
|
||||
}
|
||||
__r.__uh[0] = __rmin;
|
||||
__r.__uh[1] = __ridx;
|
||||
return __r.__m;
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packus_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_packsu((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)vec_cmpgt((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
#include_next <smmintrin.h>
|
||||
#endif /* defined(__powerpc64__) && \
|
||||
* (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
|
||||
|
||||
#endif /* SMMINTRIN_H_ */
|
||||
453
clangd/lib/clang/18/include/ppc_wrappers/tmmintrin.h
Normal file
453
clangd/lib/clang/18/include/ppc_wrappers/tmmintrin.h
Normal file
@@ -0,0 +1,453 @@
|
||||
/*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 9.0. */
|
||||
|
||||
#ifndef NO_WARN_X86_INTRINSICS
|
||||
/* This header is distributed to simplify porting x86_64 code that
|
||||
makes explicit use of Intel intrinsics to powerpc64le.
|
||||
|
||||
It is the user's responsibility to determine if the results are
|
||||
acceptable and make additional changes as necessary.
|
||||
|
||||
Note that much code that uses Intel intrinsics can be rewritten in
|
||||
standard C or GNU C extensions, which are more portable and better
|
||||
optimized across multiple targets. */
|
||||
#endif
|
||||
|
||||
#ifndef TMMINTRIN_H_
|
||||
#define TMMINTRIN_H_
|
||||
|
||||
#if defined(__powerpc64__) && \
|
||||
(defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
|
||||
|
||||
#include <altivec.h>
|
||||
|
||||
/* We need definitions from the SSE header files. */
|
||||
#include <pmmintrin.h>
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_epi16(__m128i __A) {
|
||||
return (__m128i)vec_abs((__v8hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_epi32(__m128i __A) {
|
||||
return (__m128i)vec_abs((__v4si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_epi8(__m128i __A) {
|
||||
return (__m128i)vec_abs((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_pi16(__m64 __A) {
|
||||
__v8hi __B = (__v8hi)(__v2du){__A, __A};
|
||||
return (__m64)((__v2du)vec_abs(__B))[0];
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_pi32(__m64 __A) {
|
||||
__v4si __B = (__v4si)(__v2du){__A, __A};
|
||||
return (__m64)((__v2du)vec_abs(__B))[0];
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_pi8(__m64 __A) {
|
||||
__v16qi __B = (__v16qi)(__v2du){__A, __A};
|
||||
return (__m64)((__v2du)vec_abs(__B))[0];
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) {
|
||||
if (__builtin_constant_p(__count) && __count < 16) {
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_reve((__v16qu)__A);
|
||||
__B = (__m128i)vec_reve((__v16qu)__B);
|
||||
#endif
|
||||
__A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count);
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_reve((__v16qu)__A);
|
||||
#endif
|
||||
return __A;
|
||||
}
|
||||
|
||||
if (__count == 0)
|
||||
return __B;
|
||||
|
||||
if (__count >= 16) {
|
||||
if (__count >= 32) {
|
||||
const __v16qu __zero = {0};
|
||||
return (__m128i)__zero;
|
||||
} else {
|
||||
const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8));
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
return (__m128i)vec_sro((__v16qu)__A, __shift);
|
||||
#else
|
||||
return (__m128i)vec_slo((__v16qu)__A, __shift);
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8));
|
||||
const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8));
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__A = (__m128i)vec_slo((__v16qu)__A, __shiftA);
|
||||
__B = (__m128i)vec_sro((__v16qu)__B, __shiftB);
|
||||
#else
|
||||
__A = (__m128i)vec_sro((__v16qu)__A, __shiftA);
|
||||
__B = (__m128i)vec_slo((__v16qu)__B, __shiftB);
|
||||
#endif
|
||||
return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B);
|
||||
}
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) {
|
||||
if (__count < 16) {
|
||||
__v2du __C = {__B, __A};
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
const __v4su __shift = {__count << 3, 0, 0, 0};
|
||||
__C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift);
|
||||
#else
|
||||
const __v4su __shift = {0, 0, 0, __count << 3};
|
||||
__C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift);
|
||||
#endif
|
||||
return (__m64)__C[0];
|
||||
} else {
|
||||
const __m64 __zero = {0};
|
||||
return __zero;
|
||||
}
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_epi16(__m128i __A, __m128i __B) {
|
||||
const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
|
||||
16, 17, 20, 21, 24, 25, 28, 29};
|
||||
const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
__v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
|
||||
__v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
|
||||
return (__m128i)vec_add(__C, __D);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_epi32(__m128i __A, __m128i __B) {
|
||||
const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11,
|
||||
16, 17, 18, 19, 24, 25, 26, 27};
|
||||
const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15,
|
||||
20, 21, 22, 23, 28, 29, 30, 31};
|
||||
__v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
|
||||
__v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
|
||||
return (__m128i)vec_add(__C, __D);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_pi16(__m64 __A, __m64 __B) {
|
||||
__v8hi __C = (__v8hi)(__v2du){__A, __B};
|
||||
const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
|
||||
const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
|
||||
__v8hi __D = vec_perm(__C, __C, __Q);
|
||||
__C = vec_perm(__C, __C, __P);
|
||||
__C = vec_add(__C, __D);
|
||||
return (__m64)((__v2du)__C)[1];
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_pi32(__m64 __A, __m64 __B) {
|
||||
__v4si __C = (__v4si)(__v2du){__A, __B};
|
||||
const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
|
||||
const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
|
||||
__v4si __D = vec_perm(__C, __C, __Q);
|
||||
__C = vec_perm(__C, __C, __P);
|
||||
__C = vec_add(__C, __D);
|
||||
return (__m64)((__v2du)__C)[1];
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadds_epi16(__m128i __A, __m128i __B) {
|
||||
__v4si __C = {0}, __D = {0};
|
||||
__C = vec_sum4s((__v8hi)__A, __C);
|
||||
__D = vec_sum4s((__v8hi)__B, __D);
|
||||
__C = (__v4si)vec_packs(__C, __D);
|
||||
return (__m128i)__C;
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadds_pi16(__m64 __A, __m64 __B) {
|
||||
const __v4si __zero = {0};
|
||||
__v8hi __C = (__v8hi)(__v2du){__A, __B};
|
||||
__v4si __D = vec_sum4s(__C, __zero);
|
||||
__C = vec_packs(__D, __D);
|
||||
return (__m64)((__v2du)__C)[1];
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_epi16(__m128i __A, __m128i __B) {
|
||||
const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
|
||||
16, 17, 20, 21, 24, 25, 28, 29};
|
||||
const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
__v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
|
||||
__v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
|
||||
return (__m128i)vec_sub(__C, __D);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_epi32(__m128i __A, __m128i __B) {
|
||||
const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11,
|
||||
16, 17, 18, 19, 24, 25, 26, 27};
|
||||
const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15,
|
||||
20, 21, 22, 23, 28, 29, 30, 31};
|
||||
__v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
|
||||
__v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
|
||||
return (__m128i)vec_sub(__C, __D);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_pi16(__m64 __A, __m64 __B) {
|
||||
const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
|
||||
const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
|
||||
__v8hi __C = (__v8hi)(__v2du){__A, __B};
|
||||
__v8hi __D = vec_perm(__C, __C, __Q);
|
||||
__C = vec_perm(__C, __C, __P);
|
||||
__C = vec_sub(__C, __D);
|
||||
return (__m64)((__v2du)__C)[1];
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_pi32(__m64 __A, __m64 __B) {
|
||||
const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
|
||||
const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
|
||||
__v4si __C = (__v4si)(__v2du){__A, __B};
|
||||
__v4si __D = vec_perm(__C, __C, __Q);
|
||||
__C = vec_perm(__C, __C, __P);
|
||||
__C = vec_sub(__C, __D);
|
||||
return (__m64)((__v2du)__C)[1];
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsubs_epi16(__m128i __A, __m128i __B) {
|
||||
const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
|
||||
16, 17, 20, 21, 24, 25, 28, 29};
|
||||
const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
__v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
|
||||
__v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
|
||||
return (__m128i)vec_subs(__C, __D);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsubs_pi16(__m64 __A, __m64 __B) {
|
||||
const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
|
||||
const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
|
||||
__v8hi __C = (__v8hi)(__v2du){__A, __B};
|
||||
__v8hi __D = vec_perm(__C, __C, __P);
|
||||
__v8hi __E = vec_perm(__C, __C, __Q);
|
||||
__C = vec_subs(__D, __E);
|
||||
return (__m64)((__v2du)__C)[1];
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shuffle_epi8(__m128i __A, __m128i __B) {
|
||||
const __v16qi __zero = {0};
|
||||
__vector __bool char __select = vec_cmplt((__v16qi)__B, __zero);
|
||||
__v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B);
|
||||
return (__m128i)vec_sel(__C, __zero, __select);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shuffle_pi8(__m64 __A, __m64 __B) {
|
||||
const __v16qi __zero = {0};
|
||||
__v16qi __C = (__v16qi)(__v2du){__A, __A};
|
||||
__v16qi __D = (__v16qi)(__v2du){__B, __B};
|
||||
__vector __bool char __select = vec_cmplt((__v16qi)__D, __zero);
|
||||
__C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D);
|
||||
__C = vec_sel(__C, __zero, __select);
|
||||
return (__m64)((__v2du)(__C))[0];
|
||||
}
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_epi8(__m128i __A, __m128i __B) {
|
||||
const __v16qi __zero = {0};
|
||||
__v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero);
|
||||
__v16qi __selectpos =
|
||||
(__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero));
|
||||
__v16qi __conv = vec_add(__selectneg, __selectpos);
|
||||
return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_epi16(__m128i __A, __m128i __B) {
|
||||
const __v8hi __zero = {0};
|
||||
__v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero);
|
||||
__v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero));
|
||||
__v8hi __conv = vec_add(__selectneg, __selectpos);
|
||||
return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_epi32(__m128i __A, __m128i __B) {
|
||||
const __v4si __zero = {0};
|
||||
__v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero);
|
||||
__v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero));
|
||||
__v4si __conv = vec_add(__selectneg, __selectpos);
|
||||
return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_pi8(__m64 __A, __m64 __B) {
|
||||
const __v16qi __zero = {0};
|
||||
__v16qi __C = (__v16qi)(__v2du){__A, __A};
|
||||
__v16qi __D = (__v16qi)(__v2du){__B, __B};
|
||||
__C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D);
|
||||
return (__m64)((__v2du)(__C))[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_pi16(__m64 __A, __m64 __B) {
|
||||
const __v8hi __zero = {0};
|
||||
__v8hi __C = (__v8hi)(__v2du){__A, __A};
|
||||
__v8hi __D = (__v8hi)(__v2du){__B, __B};
|
||||
__C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D);
|
||||
return (__m64)((__v2du)(__C))[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR8
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_pi32(__m64 __A, __m64 __B) {
|
||||
const __v4si __zero = {0};
|
||||
__v4si __C = (__v4si)(__v2du){__A, __A};
|
||||
__v4si __D = (__v4si)(__v2du){__B, __B};
|
||||
__C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D);
|
||||
return (__m64)((__v2du)(__C))[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddubs_epi16(__m128i __A, __m128i __B) {
|
||||
__v8hi __unsigned = vec_splats((signed short)0x00ff);
|
||||
__v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned);
|
||||
__v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned);
|
||||
__v8hi __E = vec_unpackh((__v16qi)__B);
|
||||
__v8hi __F = vec_unpackl((__v16qi)__B);
|
||||
__C = vec_mul(__C, __E);
|
||||
__D = vec_mul(__D, __F);
|
||||
const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13,
|
||||
16, 17, 20, 21, 24, 25, 28, 29};
|
||||
const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
__E = vec_perm(__C, __D, __odds);
|
||||
__F = vec_perm(__C, __D, __evens);
|
||||
return (__m128i)vec_adds(__E, __F);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddubs_pi16(__m64 __A, __m64 __B) {
|
||||
__v8hi __C = (__v8hi)(__v2du){__A, __A};
|
||||
__C = vec_unpackl((__v16qi)__C);
|
||||
const __v8hi __unsigned = vec_splats((signed short)0x00ff);
|
||||
__C = vec_and(__C, __unsigned);
|
||||
__v8hi __D = (__v8hi)(__v2du){__B, __B};
|
||||
__D = vec_unpackl((__v16qi)__D);
|
||||
__D = vec_mul(__C, __D);
|
||||
const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13,
|
||||
16, 17, 20, 21, 24, 25, 28, 29};
|
||||
const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
__C = vec_perm(__D, __D, __odds);
|
||||
__D = vec_perm(__D, __D, __evens);
|
||||
__C = vec_adds(__C, __D);
|
||||
return (__m64)((__v2du)(__C))[0];
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mulhrs_epi16(__m128i __A, __m128i __B) {
|
||||
__v4si __C = vec_unpackh((__v8hi)__A);
|
||||
__v4si __D = vec_unpackh((__v8hi)__B);
|
||||
__C = vec_mul(__C, __D);
|
||||
__D = vec_unpackl((__v8hi)__A);
|
||||
__v4si __E = vec_unpackl((__v8hi)__B);
|
||||
__D = vec_mul(__D, __E);
|
||||
const __v4su __shift = vec_splats((unsigned int)14);
|
||||
__C = vec_sr(__C, __shift);
|
||||
__D = vec_sr(__D, __shift);
|
||||
const __v4si __ones = vec_splats((signed int)1);
|
||||
__C = vec_add(__C, __ones);
|
||||
__C = vec_sr(__C, (__v4su)__ones);
|
||||
__D = vec_add(__D, __ones);
|
||||
__D = vec_sr(__D, (__v4su)__ones);
|
||||
return (__m128i)vec_pack(__C, __D);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mulhrs_pi16(__m64 __A, __m64 __B) {
|
||||
__v4si __C = (__v4si)(__v2du){__A, __A};
|
||||
__C = vec_unpackh((__v8hi)__C);
|
||||
__v4si __D = (__v4si)(__v2du){__B, __B};
|
||||
__D = vec_unpackh((__v8hi)__D);
|
||||
__C = vec_mul(__C, __D);
|
||||
const __v4su __shift = vec_splats((unsigned int)14);
|
||||
__C = vec_sr(__C, __shift);
|
||||
const __v4si __ones = vec_splats((signed int)1);
|
||||
__C = vec_add(__C, __ones);
|
||||
__C = vec_sr(__C, (__v4su)__ones);
|
||||
__v8hi __E = vec_pack(__C, __D);
|
||||
return (__m64)((__v2du)(__E))[0];
|
||||
}
|
||||
|
||||
#else
|
||||
#include_next <tmmintrin.h>
|
||||
#endif /* defined(__powerpc64__) && \
|
||||
* (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
|
||||
|
||||
#endif /* TMMINTRIN_H_ */
|
||||
17
clangd/lib/clang/18/include/ppc_wrappers/x86gprintrin.h
Normal file
17
clangd/lib/clang/18/include/ppc_wrappers/x86gprintrin.h
Normal file
@@ -0,0 +1,17 @@
|
||||
/*===--- x86gprintrin.h - Implementation of X86 GPR intrinsics on PowerPC --===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef X86GPRINTRIN_H_
|
||||
#define X86GPRINTRIN_H_
|
||||
|
||||
#include <bmiintrin.h>
|
||||
|
||||
#include <bmi2intrin.h>
|
||||
|
||||
#endif /* X86GPRINTRIN_H_ */
|
||||
28
clangd/lib/clang/18/include/ppc_wrappers/x86intrin.h
Normal file
28
clangd/lib/clang/18/include/ppc_wrappers/x86intrin.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/*===---- x86intrin.h - Implementation of X86 intrinsics on PowerPC --------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef NO_WARN_X86_INTRINSICS
|
||||
/* This header is distributed to simplify porting x86_64 code that
|
||||
makes explicit use of Intel intrinsics to powerpc64le.
|
||||
It is the user's responsibility to determine if the results are
|
||||
acceptable and make additional changes as necessary.
|
||||
Note that much code that uses Intel intrinsics can be rewritten in
|
||||
standard C or GNU C extensions, which are more portable and better
|
||||
optimized across multiple targets. */
|
||||
#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
||||
#endif
|
||||
|
||||
#ifndef X86INTRIN_H_
|
||||
#define X86INTRIN_H_
|
||||
|
||||
#ifdef __ALTIVEC__
|
||||
#include <immintrin.h>
|
||||
#endif /* __ALTIVEC__ */
|
||||
|
||||
#endif /* X86INTRIN_H_ */
|
||||
1827
clangd/lib/clang/18/include/ppc_wrappers/xmmintrin.h
Normal file
1827
clangd/lib/clang/18/include/ppc_wrappers/xmmintrin.h
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user