My initializer stuff

This commit is contained in:
2024-09-17 15:09:41 -03:00
commit 17aae61838
3644 changed files with 556522 additions and 0 deletions

View File

@@ -0,0 +1,105 @@
/*===- __clang_openmp_device_functions.h - OpenMP device function declares -===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_OPENMP_DEVICE_FUNCTIONS_H__
#define __CLANG_OPENMP_DEVICE_FUNCTIONS_H__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
#ifdef __cplusplus
extern "C" {
#endif
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
#define __CUDA__
#define __OPENMP_NVPTX__
/// Include declarations for libdevice functions.
#include <__clang_cuda_libdevice_declares.h>
/// Provide definitions for these functions.
#include <__clang_cuda_device_functions.h>
#undef __OPENMP_NVPTX__
#undef __CUDA__
#pragma omp end declare variant
#ifdef __AMDGCN__
#pragma omp begin declare variant match(device = {arch(amdgcn)})
// Import types which will be used by __clang_hip_libdevice_declares.h
#ifndef __cplusplus
#include <stdint.h>
#endif
#define __OPENMP_AMDGCN__
#pragma push_macro("__device__")
#define __device__
/// Include declarations for libdevice functions.
#include <__clang_hip_libdevice_declares.h>
#pragma pop_macro("__device__")
#undef __OPENMP_AMDGCN__
#pragma omp end declare variant
#endif
#ifdef __cplusplus
} // extern "C"
#endif
// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
// need to `include <new>` in C++ mode.
#ifdef __cplusplus
// We require malloc/free.
#include <cstdlib>
#pragma push_macro("OPENMP_NOEXCEPT")
#if __cplusplus >= 201103L
#define OPENMP_NOEXCEPT noexcept
#else
#define OPENMP_NOEXCEPT
#endif
// Device overrides for non-placement new and delete.
inline void *operator new(__SIZE_TYPE__ size) {
if (size == 0)
size = 1;
return ::malloc(size);
}
inline void *operator new[](__SIZE_TYPE__ size) { return ::operator new(size); }
inline void operator delete(void *ptr)OPENMP_NOEXCEPT { ::free(ptr); }
inline void operator delete[](void *ptr) OPENMP_NOEXCEPT {
::operator delete(ptr);
}
// Sized delete, C++14 only.
#if __cplusplus >= 201402L
inline void operator delete(void *ptr, __SIZE_TYPE__ size)OPENMP_NOEXCEPT {
::operator delete(ptr);
}
inline void operator delete[](void *ptr, __SIZE_TYPE__ size) OPENMP_NOEXCEPT {
::operator delete(ptr);
}
#endif
#pragma pop_macro("OPENMP_NOEXCEPT")
#endif
#endif

View File

@@ -0,0 +1,132 @@
/*===-- __clang_openmp_device_functions.h - OpenMP math declares -*- c++ -*-===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_OPENMP_CMATH_H__
#define __CLANG_OPENMP_CMATH_H__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
#include_next <cmath>
// Make sure we include our math.h overlay, it probably happend already but we
// need to be sure.
#include <math.h>
// We (might) need cstdlib because __clang_cuda_cmath.h below declares `abs`
// which might live in cstdlib.
#include <cstdlib>
// We need limits because __clang_cuda_cmath.h below uses `std::numeric_limit`.
#include <limits>
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any, allow_templates)})
#define __CUDA__
#define __OPENMP_NVPTX__
#include <__clang_cuda_cmath.h>
#undef __OPENMP_NVPTX__
#undef __CUDA__
// Overloads not provided by the CUDA wrappers but by the CUDA system headers.
// Since we do not include the latter we define them ourselves.
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
__DEVICE__ float erf(float __x) { return ::erff(__x); }
__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
__DEVICE__ long long int llrint(float __x) { return ::llrintf(__x); }
__DEVICE__ long long int llround(float __x) { return ::llroundf(__x); }
__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
__DEVICE__ float log2(float __x) { return ::log2f(__x); }
__DEVICE__ float logb(float __x) { return ::logbf(__x); }
__DEVICE__ long int lrint(float __x) { return ::lrintf(__x); }
__DEVICE__ long int lround(float __x) { return ::lroundf(__x); }
__DEVICE__ float nextafter(float __x, float __y) {
return ::nextafterf(__x, __y);
}
__DEVICE__ float remainder(float __x, float __y) {
return ::remainderf(__x, __y);
}
__DEVICE__ float scalbln(float __x, long int __y) {
return ::scalblnf(__x, __y);
}
__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
#undef __DEVICE__
#pragma omp end declare variant
#ifdef __AMDGCN__
#pragma omp begin declare variant match(device = {arch(amdgcn)})
#pragma push_macro("__constant__")
#define __constant__ __attribute__((constant))
#define __OPENMP_AMDGCN__
#include <__clang_hip_cmath.h>
#pragma pop_macro("__constant__")
#undef __OPENMP_AMDGCN__
// Define overloads otherwise which are absent
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
__DEVICE__ float acos(float __x) { return ::acosf(__x); }
__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
__DEVICE__ float erf(float __x) { return ::erff(__x); }
__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
__DEVICE__ float ldexp(float __arg, int __exp) {
return ::ldexpf(__arg, __exp);
}
__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
__DEVICE__ float logb(float __x) { return ::logbf(__x); }
__DEVICE__ float nextafter(float __x, float __y) {
return ::nextafterf(__x, __y);
}
__DEVICE__ float remainder(float __x, float __y) {
return ::remainderf(__x, __y);
}
__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
#undef __DEVICE__
#pragma omp end declare variant
#endif // __AMDGCN__
#endif

View File

@@ -0,0 +1,55 @@
/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_OPENMP_COMPLEX__
#define __CLANG_OPENMP_COMPLEX__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
// We require std::math functions in the complex builtins below.
#include <cmath>
#ifdef __NVPTX__
#define __OPENMP_NVPTX__
#include <__clang_cuda_complex_builtins.h>
#undef __OPENMP_NVPTX__
#endif // __NVPTX__
#ifdef __AMDGCN__
#define __OPENMP_AMDGCN__
#include <__clang_cuda_complex_builtins.h>
#undef __OPENMP_AMDGCN__
#endif // __AMDGCN__
#endif
// Grab the host header too.
#include_next <complex>
// If we are compiling against libc++, the macro _LIBCPP_STD_VER should be set
// after including <cmath> above. Since the complex header we use is a
// simplified version of the libc++, we don't need it in this case. If we
// compile against libstdc++, or any other standard library, we will overload
// the (hopefully template) functions in the <complex> header with the ones we
// got from libc++ which decomposes math functions, like `std::sin`, into
// arithmetic and calls to non-complex functions, all of which we can then
// handle.
#ifndef _LIBCPP_STD_VER
#pragma omp begin declare variant match( \
device = {arch(amdgcn, nvptx, nvptx64)}, \
implementation = {extension(match_any, allow_templates)})
#include <complex_cmath.h>
#pragma omp end declare variant
#endif // _LIBCPP_STD_VER

View File

@@ -0,0 +1,35 @@
/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_OPENMP_COMPLEX_H__
#define __CLANG_OPENMP_COMPLEX_H__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
// We require math functions in the complex builtins below.
#include <math.h>
#ifdef __NVPTX__
#define __OPENMP_NVPTX__
#include <__clang_cuda_complex_builtins.h>
#undef __OPENMP_NVPTX__
#endif
#ifdef __AMDGCN__
#define __OPENMP_AMDGCN__
#include <__clang_cuda_complex_builtins.h>
#undef __OPENMP_AMDGCN__
#endif
#endif
// Grab the host header too.
#include_next <complex.h>

View File

@@ -0,0 +1,388 @@
//===------------------------- __complex_cmath.h --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// std::complex header copied from the libcxx source and simplified for use in
// OpenMP target offload regions.
//
//===----------------------------------------------------------------------===//
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
#ifndef __cplusplus
#error "This file is for C++ compilation only."
#endif
#ifndef _LIBCPP_COMPLEX
#define _LIBCPP_COMPLEX
#include <cmath>
#include <type_traits>
#define __DEVICE__ static constexpr __attribute__((nothrow))
namespace std {
// abs
template <class _Tp> __DEVICE__ _Tp abs(const std::complex<_Tp> &__c) {
return hypot(__c.real(), __c.imag());
}
// arg
template <class _Tp> __DEVICE__ _Tp arg(const std::complex<_Tp> &__c) {
return atan2(__c.imag(), __c.real());
}
template <class _Tp>
typename enable_if<is_integral<_Tp>::value || is_same<_Tp, double>::value,
double>::type
arg(_Tp __re) {
return atan2(0., __re);
}
template <class _Tp>
typename enable_if<is_same<_Tp, float>::value, float>::type arg(_Tp __re) {
return atan2f(0.F, __re);
}
// norm
template <class _Tp> __DEVICE__ _Tp norm(const std::complex<_Tp> &__c) {
if (std::isinf(__c.real()))
return abs(__c.real());
if (std::isinf(__c.imag()))
return abs(__c.imag());
return __c.real() * __c.real() + __c.imag() * __c.imag();
}
// conj
template <class _Tp> std::complex<_Tp> conj(const std::complex<_Tp> &__c) {
return std::complex<_Tp>(__c.real(), -__c.imag());
}
// proj
template <class _Tp> std::complex<_Tp> proj(const std::complex<_Tp> &__c) {
std::complex<_Tp> __r = __c;
if (std::isinf(__c.real()) || std::isinf(__c.imag()))
__r = std::complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag()));
return __r;
}
// polar
template <class _Tp>
complex<_Tp> polar(const _Tp &__rho, const _Tp &__theta = _Tp()) {
if (std::isnan(__rho) || signbit(__rho))
return std::complex<_Tp>(_Tp(NAN), _Tp(NAN));
if (std::isnan(__theta)) {
if (std::isinf(__rho))
return std::complex<_Tp>(__rho, __theta);
return std::complex<_Tp>(__theta, __theta);
}
if (std::isinf(__theta)) {
if (std::isinf(__rho))
return std::complex<_Tp>(__rho, _Tp(NAN));
return std::complex<_Tp>(_Tp(NAN), _Tp(NAN));
}
_Tp __x = __rho * cos(__theta);
if (std::isnan(__x))
__x = 0;
_Tp __y = __rho * sin(__theta);
if (std::isnan(__y))
__y = 0;
return std::complex<_Tp>(__x, __y);
}
// log
template <class _Tp> std::complex<_Tp> log(const std::complex<_Tp> &__x) {
return std::complex<_Tp>(log(abs(__x)), arg(__x));
}
// log10
template <class _Tp> std::complex<_Tp> log10(const std::complex<_Tp> &__x) {
return log(__x) / log(_Tp(10));
}
// sqrt
template <class _Tp>
__DEVICE__ std::complex<_Tp> sqrt(const std::complex<_Tp> &__x) {
if (std::isinf(__x.imag()))
return std::complex<_Tp>(_Tp(INFINITY), __x.imag());
if (std::isinf(__x.real())) {
if (__x.real() > _Tp(0))
return std::complex<_Tp>(__x.real(), std::isnan(__x.imag())
? __x.imag()
: copysign(_Tp(0), __x.imag()));
return std::complex<_Tp>(std::isnan(__x.imag()) ? __x.imag() : _Tp(0),
copysign(__x.real(), __x.imag()));
}
return polar(sqrt(abs(__x)), arg(__x) / _Tp(2));
}
// exp
template <class _Tp>
__DEVICE__ std::complex<_Tp> exp(const std::complex<_Tp> &__x) {
_Tp __i = __x.imag();
if (std::isinf(__x.real())) {
if (__x.real() < _Tp(0)) {
if (!std::isfinite(__i))
__i = _Tp(1);
} else if (__i == 0 || !std::isfinite(__i)) {
if (std::isinf(__i))
__i = _Tp(NAN);
return std::complex<_Tp>(__x.real(), __i);
}
} else if (std::isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __e = exp(__x.real());
return std::complex<_Tp>(__e * cos(__i), __e * sin(__i));
}
// pow
template <class _Tp>
std::complex<_Tp> pow(const std::complex<_Tp> &__x,
const std::complex<_Tp> &__y) {
return exp(__y * log(__x));
}
// __sqr, computes pow(x, 2)
template <class _Tp> std::complex<_Tp> __sqr(const std::complex<_Tp> &__x) {
return std::complex<_Tp>((__x.real() - __x.imag()) *
(__x.real() + __x.imag()),
_Tp(2) * __x.real() * __x.imag());
}
// asinh
template <class _Tp>
__DEVICE__ std::complex<_Tp> asinh(const std::complex<_Tp> &__x) {
const _Tp __pi(atan2(+0., -0.));
if (std::isinf(__x.real())) {
if (std::isnan(__x.imag()))
return __x;
if (std::isinf(__x.imag()))
return std::complex<_Tp>(__x.real(),
copysign(__pi * _Tp(0.25), __x.imag()));
return std::complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (std::isnan(__x.real())) {
if (std::isinf(__x.imag()))
return std::complex<_Tp>(__x.imag(), __x.real());
if (__x.imag() == 0)
return __x;
return std::complex<_Tp>(__x.real(), __x.real());
}
if (std::isinf(__x.imag()))
return std::complex<_Tp>(copysign(__x.imag(), __x.real()),
copysign(__pi / _Tp(2), __x.imag()));
std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) + _Tp(1)));
return std::complex<_Tp>(copysign(__z.real(), __x.real()),
copysign(__z.imag(), __x.imag()));
}
// acosh
template <class _Tp>
__DEVICE__ std::complex<_Tp> acosh(const std::complex<_Tp> &__x) {
const _Tp __pi(atan2(+0., -0.));
if (std::isinf(__x.real())) {
if (std::isnan(__x.imag()))
return std::complex<_Tp>(abs(__x.real()), __x.imag());
if (std::isinf(__x.imag())) {
if (__x.real() > 0)
return std::complex<_Tp>(__x.real(),
copysign(__pi * _Tp(0.25), __x.imag()));
else
return std::complex<_Tp>(-__x.real(),
copysign(__pi * _Tp(0.75), __x.imag()));
}
if (__x.real() < 0)
return std::complex<_Tp>(-__x.real(), copysign(__pi, __x.imag()));
return std::complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (std::isnan(__x.real())) {
if (std::isinf(__x.imag()))
return std::complex<_Tp>(abs(__x.imag()), __x.real());
return std::complex<_Tp>(__x.real(), __x.real());
}
if (std::isinf(__x.imag()))
return std::complex<_Tp>(abs(__x.imag()),
copysign(__pi / _Tp(2), __x.imag()));
std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1)));
return std::complex<_Tp>(copysign(__z.real(), _Tp(0)),
copysign(__z.imag(), __x.imag()));
}
// atanh
template <class _Tp>
__DEVICE__ std::complex<_Tp> atanh(const std::complex<_Tp> &__x) {
const _Tp __pi(atan2(+0., -0.));
if (std::isinf(__x.imag())) {
return std::complex<_Tp>(copysign(_Tp(0), __x.real()),
copysign(__pi / _Tp(2), __x.imag()));
}
if (std::isnan(__x.imag())) {
if (std::isinf(__x.real()) || __x.real() == 0)
return std::complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag());
return std::complex<_Tp>(__x.imag(), __x.imag());
}
if (std::isnan(__x.real())) {
return std::complex<_Tp>(__x.real(), __x.real());
}
if (std::isinf(__x.real())) {
return std::complex<_Tp>(copysign(_Tp(0), __x.real()),
copysign(__pi / _Tp(2), __x.imag()));
}
if (abs(__x.real()) == _Tp(1) && __x.imag() == _Tp(0)) {
return std::complex<_Tp>(copysign(_Tp(INFINITY), __x.real()),
copysign(_Tp(0), __x.imag()));
}
std::complex<_Tp> __z = log((_Tp(1) + __x) / (_Tp(1) - __x)) / _Tp(2);
return std::complex<_Tp>(copysign(__z.real(), __x.real()),
copysign(__z.imag(), __x.imag()));
}
// sinh
template <class _Tp>
__DEVICE__ std::complex<_Tp> sinh(const std::complex<_Tp> &__x) {
if (std::isinf(__x.real()) && !std::isfinite(__x.imag()))
return std::complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.real() == 0 && !std::isfinite(__x.imag()))
return std::complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.imag() == 0 && !std::isfinite(__x.real()))
return __x;
return std::complex<_Tp>(sinh(__x.real()) * cos(__x.imag()),
cosh(__x.real()) * sin(__x.imag()));
}
// cosh
template <class _Tp>
__DEVICE__ std::complex<_Tp> cosh(const std::complex<_Tp> &__x) {
if (std::isinf(__x.real()) && !std::isfinite(__x.imag()))
return std::complex<_Tp>(abs(__x.real()), _Tp(NAN));
if (__x.real() == 0 && !std::isfinite(__x.imag()))
return std::complex<_Tp>(_Tp(NAN), __x.real());
if (__x.real() == 0 && __x.imag() == 0)
return std::complex<_Tp>(_Tp(1), __x.imag());
if (__x.imag() == 0 && !std::isfinite(__x.real()))
return std::complex<_Tp>(abs(__x.real()), __x.imag());
return std::complex<_Tp>(cosh(__x.real()) * cos(__x.imag()),
sinh(__x.real()) * sin(__x.imag()));
}
// tanh
template <class _Tp>
__DEVICE__ std::complex<_Tp> tanh(const std::complex<_Tp> &__x) {
if (std::isinf(__x.real())) {
if (!std::isfinite(__x.imag()))
return std::complex<_Tp>(_Tp(1), _Tp(0));
return std::complex<_Tp>(_Tp(1),
copysign(_Tp(0), sin(_Tp(2) * __x.imag())));
}
if (std::isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __2r(_Tp(2) * __x.real());
_Tp __2i(_Tp(2) * __x.imag());
_Tp __d(cosh(__2r) + cos(__2i));
_Tp __2rsh(sinh(__2r));
if (std::isinf(__2rsh) && std::isinf(__d))
return std::complex<_Tp>(__2rsh > _Tp(0) ? _Tp(1) : _Tp(-1),
__2i > _Tp(0) ? _Tp(0) : _Tp(-0.));
return std::complex<_Tp>(__2rsh / __d, sin(__2i) / __d);
}
// asin
template <class _Tp>
__DEVICE__ std::complex<_Tp> asin(const std::complex<_Tp> &__x) {
std::complex<_Tp> __z = asinh(complex<_Tp>(-__x.imag(), __x.real()));
return std::complex<_Tp>(__z.imag(), -__z.real());
}
// acos
template <class _Tp>
__DEVICE__ std::complex<_Tp> acos(const std::complex<_Tp> &__x) {
const _Tp __pi(atan2(+0., -0.));
if (std::isinf(__x.real())) {
if (std::isnan(__x.imag()))
return std::complex<_Tp>(__x.imag(), __x.real());
if (std::isinf(__x.imag())) {
if (__x.real() < _Tp(0))
return std::complex<_Tp>(_Tp(0.75) * __pi, -__x.imag());
return std::complex<_Tp>(_Tp(0.25) * __pi, -__x.imag());
}
if (__x.real() < _Tp(0))
return std::complex<_Tp>(__pi,
signbit(__x.imag()) ? -__x.real() : __x.real());
return std::complex<_Tp>(_Tp(0),
signbit(__x.imag()) ? __x.real() : -__x.real());
}
if (std::isnan(__x.real())) {
if (std::isinf(__x.imag()))
return std::complex<_Tp>(__x.real(), -__x.imag());
return std::complex<_Tp>(__x.real(), __x.real());
}
if (std::isinf(__x.imag()))
return std::complex<_Tp>(__pi / _Tp(2), -__x.imag());
if (__x.real() == 0 && (__x.imag() == 0 || isnan(__x.imag())))
return std::complex<_Tp>(__pi / _Tp(2), -__x.imag());
std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1)));
if (signbit(__x.imag()))
return std::complex<_Tp>(abs(__z.imag()), abs(__z.real()));
return std::complex<_Tp>(abs(__z.imag()), -abs(__z.real()));
}
// atan
template <class _Tp>
__DEVICE__ std::complex<_Tp> atan(const std::complex<_Tp> &__x) {
std::complex<_Tp> __z = atanh(complex<_Tp>(-__x.imag(), __x.real()));
return std::complex<_Tp>(__z.imag(), -__z.real());
}
// sin
template <class _Tp>
__DEVICE__ std::complex<_Tp> sin(const std::complex<_Tp> &__x) {
std::complex<_Tp> __z = sinh(complex<_Tp>(-__x.imag(), __x.real()));
return std::complex<_Tp>(__z.imag(), -__z.real());
}
// cos
template <class _Tp> std::complex<_Tp> cos(const std::complex<_Tp> &__x) {
return cosh(complex<_Tp>(-__x.imag(), __x.real()));
}
// tan
template <class _Tp>
__DEVICE__ std::complex<_Tp> tan(const std::complex<_Tp> &__x) {
std::complex<_Tp> __z = tanh(complex<_Tp>(-__x.imag(), __x.real()));
return std::complex<_Tp>(__z.imag(), -__z.real());
}
} // namespace std
#endif

View File

@@ -0,0 +1,61 @@
/*===---- openmp_wrapper/math.h -------- OpenMP math.h intercept ------ c++ -===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
// If we are in C++ mode and include <math.h> (not <cmath>) first, we still need
// to make sure <cmath> is read first. The problem otherwise is that we haven't
// seen the declarations of the math.h functions when the system math.h includes
// our cmath overlay. However, our cmath overlay, or better the underlying
// overlay, e.g. CUDA, uses the math.h functions. Since we haven't declared them
// yet we get errors. CUDA avoids this by eagerly declaring all math functions
// (in the __device__ space) but we cannot do this. Instead we break the
// dependence by forcing cmath to go first. While our cmath will in turn include
// this file, the cmath guards will prevent recursion.
#ifdef __cplusplus
#include <cmath>
#endif
#ifndef __CLANG_OPENMP_MATH_H__
#define __CLANG_OPENMP_MATH_H__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
#include_next <math.h>
// We need limits.h for __clang_cuda_math.h below and because it should not hurt
// we include it eagerly here.
#include <limits.h>
// We need stdlib.h because (for now) __clang_cuda_math.h below declares `abs`
// which should live in stdlib.h.
#include <stdlib.h>
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
#define __CUDA__
#define __OPENMP_NVPTX__
#include <__clang_cuda_math.h>
#undef __OPENMP_NVPTX__
#undef __CUDA__
#pragma omp end declare variant
#ifdef __AMDGCN__
#pragma omp begin declare variant match(device = {arch(amdgcn)})
#define __OPENMP_AMDGCN__
#include <__clang_hip_math.h>
#undef __OPENMP_AMDGCN__
#pragma omp end declare variant
#endif
#endif

View File

@@ -0,0 +1,48 @@
//===--------- new - OPENMP wrapper for <new> ------------------------------===
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------------===
#ifndef __CLANG_OPENMP_WRAPPERS_NEW
#define __CLANG_OPENMP_WRAPPERS_NEW
// We need the system <new> for the std::nothrow_t. The new/delete operators
// which do not use nothrow_t are provided without the <new> header.
#include_next <new>
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(_OPENMP)
#include <cstdlib>
#pragma push_macro("OPENMP_NOEXCEPT")
#if __cplusplus >= 201103L
#define OPENMP_NOEXCEPT noexcept
#else
#define OPENMP_NOEXCEPT
#endif
inline void *operator new(__SIZE_TYPE__ size,
const std::nothrow_t &) OPENMP_NOEXCEPT {
return ::operator new(size);
}
inline void *operator new[](__SIZE_TYPE__ size, const std::nothrow_t &) {
return ::operator new(size);
}
inline void operator delete(void *ptr, const std::nothrow_t &)OPENMP_NOEXCEPT {
::operator delete(ptr);
}
inline void operator delete[](void *ptr,
const std::nothrow_t &) OPENMP_NOEXCEPT {
::operator delete(ptr);
}
#pragma pop_macro("OPENMP_NOEXCEPT")
#endif
#endif // include guard