2008-08-21 04:08:38 +08:00
|
|
|
// This file is part of Eigen, a lightweight C++ template library
|
2009-05-23 02:25:33 +08:00
|
|
|
// for linear algebra.
|
2008-08-21 04:08:38 +08:00
|
|
|
//
|
2010-06-25 05:21:58 +08:00
|
|
|
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
2008-11-24 21:40:43 +08:00
|
|
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
2008-08-21 04:08:38 +08:00
|
|
|
//
|
2012-07-14 02:42:47 +08:00
|
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
|
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
|
|
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
2008-08-21 04:08:38 +08:00
|
|
|
|
2020-01-11 18:31:21 +08:00
|
|
|
#include "packetmath_test_shared.h"
|
2020-06-26 05:31:16 +08:00
|
|
|
#include "random_without_cast_overflow.h"
|
2019-06-21 02:47:49 +08:00
|
|
|
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_ADD(const T& a, const T& b) {
|
|
|
|
|
return a + b;
|
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_SUB(const T& a, const T& b) {
|
|
|
|
|
return a - b;
|
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_MUL(const T& a, const T& b) {
|
|
|
|
|
return a * b;
|
|
|
|
|
}
|
2025-03-23 09:05:53 +08:00
|
|
|
|
|
|
|
|
template <typename Scalar, typename EnableIf = void>
|
|
|
|
|
struct madd_impl {
|
|
|
|
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar madd(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
|
|
|
return a * b + c;
|
|
|
|
|
}
|
|
|
|
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar msub(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
|
|
|
return a * b - c;
|
|
|
|
|
}
|
|
|
|
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar nmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
|
|
|
return c - a * b;
|
|
|
|
|
}
|
|
|
|
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar nmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
|
|
|
|
|
return Scalar(0) - (a * b + c);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename Scalar>
|
|
|
|
|
struct madd_impl<Scalar,
|
|
|
|
|
std::enable_if_t<Eigen::internal::is_scalar<Scalar>::value && Eigen::NumTraits<Scalar>::IsSigned>> {
|
|
|
|
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar madd(const Scalar& a, const Scalar& b, const Scalar& c) {
|
2025-08-29 05:40:19 +08:00
|
|
|
return numext::madd(a, b, c);
|
2025-03-23 09:05:53 +08:00
|
|
|
}
|
|
|
|
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar msub(const Scalar& a, const Scalar& b, const Scalar& c) {
|
2025-08-29 05:40:19 +08:00
|
|
|
return numext::madd(a, b, Scalar(-c));
|
2025-03-23 09:05:53 +08:00
|
|
|
}
|
|
|
|
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar nmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
|
2025-08-29 05:40:19 +08:00
|
|
|
return numext::madd(Scalar(-a), b, c);
|
2025-03-23 09:05:53 +08:00
|
|
|
}
|
|
|
|
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar nmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
|
2025-08-29 05:40:19 +08:00
|
|
|
return -Scalar(numext::madd(a, b, c));
|
2025-03-23 09:05:53 +08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
2022-01-26 12:25:41 +08:00
|
|
|
inline T REF_MADD(const T& a, const T& b, const T& c) {
|
2025-03-23 09:05:53 +08:00
|
|
|
return madd_impl<T>::madd(a, b, c);
|
2022-01-26 12:25:41 +08:00
|
|
|
}
|
|
|
|
|
template <typename T>
|
|
|
|
|
inline T REF_MSUB(const T& a, const T& b, const T& c) {
|
2025-03-23 09:05:53 +08:00
|
|
|
return madd_impl<T>::msub(a, b, c);
|
2022-01-26 12:25:41 +08:00
|
|
|
}
|
|
|
|
|
template <typename T>
|
|
|
|
|
inline T REF_NMADD(const T& a, const T& b, const T& c) {
|
2025-03-23 09:05:53 +08:00
|
|
|
return madd_impl<T>::nmadd(a, b, c);
|
2022-01-26 12:25:41 +08:00
|
|
|
}
|
|
|
|
|
template <typename T>
|
|
|
|
|
inline T REF_NMSUB(const T& a, const T& b, const T& c) {
|
2025-03-23 09:05:53 +08:00
|
|
|
return madd_impl<T>::nmsub(a, b, c);
|
2022-01-26 12:25:41 +08:00
|
|
|
}
|
|
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_DIV(const T& a, const T& b) {
|
|
|
|
|
return a / b;
|
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
2022-01-22 07:49:18 +08:00
|
|
|
inline T REF_RECIPROCAL(const T& a) {
|
|
|
|
|
return T(1) / a;
|
|
|
|
|
}
|
|
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_ABS_DIFF(const T& a, const T& b) {
|
|
|
|
|
return a > b ? a - b : b - a;
|
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
|
2020-05-20 07:21:56 +08:00
|
|
|
// Specializations for bool.
|
2020-05-12 04:23:31 +08:00
|
|
|
template <>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline bool REF_ADD(const bool& a, const bool& b) {
|
|
|
|
|
return a || b;
|
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline bool REF_SUB(const bool& a, const bool& b) {
|
|
|
|
|
return a ^ b;
|
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline bool REF_MUL(const bool& a, const bool& b) {
|
|
|
|
|
return a && b;
|
|
|
|
|
}
|
2022-01-26 12:25:41 +08:00
|
|
|
template <>
|
|
|
|
|
inline bool REF_MADD(const bool& a, const bool& b, const bool& c) {
|
|
|
|
|
return (a && b) || c;
|
|
|
|
|
}
|
2025-03-23 09:05:53 +08:00
|
|
|
template <>
|
|
|
|
|
inline bool REF_DIV(const bool& a, const bool& b) {
|
|
|
|
|
return a && b;
|
|
|
|
|
}
|
|
|
|
|
template <>
|
|
|
|
|
inline bool REF_RECIPROCAL(const bool& a) {
|
|
|
|
|
return a;
|
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
|
2021-01-21 11:00:09 +08:00
|
|
|
template <typename T>
|
|
|
|
|
inline T REF_FREXP(const T& x, T& exp) {
|
2021-12-09 01:57:23 +08:00
|
|
|
int iexp = 0;
|
2021-01-21 11:00:09 +08:00
|
|
|
EIGEN_USING_STD(frexp)
|
|
|
|
|
const T out = static_cast<T>(frexp(x, &iexp));
|
|
|
|
|
exp = static_cast<T>(iexp);
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2022-02-25 06:16:37 +08:00
|
|
|
// The exponent value is unspecified if the input is inf or NaN, but MSVC
|
|
|
|
|
// seems to set it to 1. We need to set it back to zero for consistency.
|
|
|
|
|
if (!(numext::isfinite)(x)) {
|
|
|
|
|
exp = T(0);
|
|
|
|
|
}
|
2021-01-21 11:00:09 +08:00
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
inline T REF_LDEXP(const T& x, const T& exp) {
|
|
|
|
|
EIGEN_USING_STD(ldexp)
|
|
|
|
|
return static_cast<T>(ldexp(x, static_cast<int>(exp)));
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-13 01:06:32 +08:00
|
|
|
// provides a convenient function to take the absolute value of each component of a complex number to prevent
|
|
|
|
|
// catastrophic cancellation in randomly generated complex numbers
|
|
|
|
|
template <typename T, bool IsComplex = NumTraits<T>::IsComplex>
|
|
|
|
|
struct abs_helper_impl {
|
|
|
|
|
static T run(T x) { return numext::abs(x); }
|
|
|
|
|
};
|
|
|
|
|
template <typename T>
|
|
|
|
|
struct abs_helper_impl<T, true> {
|
|
|
|
|
static T run(T x) {
|
|
|
|
|
T res = x;
|
|
|
|
|
numext::real_ref(res) = numext::abs(numext::real(res));
|
|
|
|
|
numext::imag_ref(res) = numext::abs(numext::imag(res));
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
template <typename T>
|
|
|
|
|
T abs_helper(T x) {
|
|
|
|
|
return abs_helper_impl<T>::run(x);
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-29 01:41:28 +08:00
|
|
|
// Uses pcast to cast from one array to another.
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
|
|
|
|
|
struct pcast_array;
|
|
|
|
|
|
|
|
|
|
template <typename SrcPacket, typename TgtPacket, int TgtCoeffRatio>
|
|
|
|
|
struct pcast_array<SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
|
|
|
|
|
typedef typename internal::unpacket_traits<SrcPacket>::type SrcScalar;
|
|
|
|
|
typedef typename internal::unpacket_traits<TgtPacket>::type TgtScalar;
|
|
|
|
|
static void cast(const SrcScalar* src, size_t size, TgtScalar* dst) {
|
|
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
2020-05-29 01:41:28 +08:00
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
size_t i;
|
|
|
|
|
for (i = 0; i < size && i + SrcPacketSize <= size; i += TgtPacketSize) {
|
|
|
|
|
internal::pstoreu(dst + i, internal::pcast<SrcPacket, TgtPacket>(internal::ploadu<SrcPacket>(src + i)));
|
|
|
|
|
}
|
|
|
|
|
// Leftovers that cannot be loaded into a packet.
|
|
|
|
|
for (; i < size; ++i) {
|
|
|
|
|
dst[i] = static_cast<TgtScalar>(src[i]);
|
2020-05-29 01:41:28 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket>
|
|
|
|
|
struct pcast_array<SrcPacket, TgtPacket, 2, 1> {
|
|
|
|
|
static void cast(const typename internal::unpacket_traits<SrcPacket>::type* src, size_t size,
|
|
|
|
|
typename internal::unpacket_traits<TgtPacket>::type* dst) {
|
2020-05-29 01:41:28 +08:00
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (size_t i = 0; i < size; i += TgtPacketSize) {
|
|
|
|
|
SrcPacket a = internal::ploadu<SrcPacket>(src + i);
|
|
|
|
|
SrcPacket b = internal::ploadu<SrcPacket>(src + i + SrcPacketSize);
|
|
|
|
|
internal::pstoreu(dst + i, internal::pcast<SrcPacket, TgtPacket>(a, b));
|
2020-05-29 01:41:28 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket>
|
|
|
|
|
struct pcast_array<SrcPacket, TgtPacket, 4, 1> {
|
|
|
|
|
static void cast(const typename internal::unpacket_traits<SrcPacket>::type* src, size_t size,
|
|
|
|
|
typename internal::unpacket_traits<TgtPacket>::type* dst) {
|
2020-05-29 01:41:28 +08:00
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (size_t i = 0; i < size; i += TgtPacketSize) {
|
|
|
|
|
SrcPacket a = internal::ploadu<SrcPacket>(src + i);
|
|
|
|
|
SrcPacket b = internal::ploadu<SrcPacket>(src + i + SrcPacketSize);
|
|
|
|
|
SrcPacket c = internal::ploadu<SrcPacket>(src + i + 2 * SrcPacketSize);
|
|
|
|
|
SrcPacket d = internal::ploadu<SrcPacket>(src + i + 3 * SrcPacketSize);
|
|
|
|
|
internal::pstoreu(dst + i, internal::pcast<SrcPacket, TgtPacket>(a, b, c, d));
|
2020-05-29 01:41:28 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket>
|
|
|
|
|
struct pcast_array<SrcPacket, TgtPacket, 8, 1> {
|
|
|
|
|
static void cast(const typename internal::unpacket_traits<SrcPacket>::type* src, size_t size,
|
|
|
|
|
typename internal::unpacket_traits<TgtPacket>::type* dst) {
|
|
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
|
|
|
|
for (size_t i = 0; i < size; i += TgtPacketSize) {
|
|
|
|
|
SrcPacket a = internal::ploadu<SrcPacket>(src + i);
|
|
|
|
|
SrcPacket b = internal::ploadu<SrcPacket>(src + i + SrcPacketSize);
|
|
|
|
|
SrcPacket c = internal::ploadu<SrcPacket>(src + i + 2 * SrcPacketSize);
|
|
|
|
|
SrcPacket d = internal::ploadu<SrcPacket>(src + i + 3 * SrcPacketSize);
|
|
|
|
|
SrcPacket e = internal::ploadu<SrcPacket>(src + i + 4 * SrcPacketSize);
|
|
|
|
|
SrcPacket f = internal::ploadu<SrcPacket>(src + i + 5 * SrcPacketSize);
|
|
|
|
|
SrcPacket g = internal::ploadu<SrcPacket>(src + i + 6 * SrcPacketSize);
|
|
|
|
|
SrcPacket h = internal::ploadu<SrcPacket>(src + i + 7 * SrcPacketSize);
|
|
|
|
|
internal::pstoreu(dst + i, internal::pcast<SrcPacket, TgtPacket>(a, b, c, d, e, f, g, h));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio, bool CanCast = false>
|
2020-03-27 04:18:19 +08:00
|
|
|
struct test_cast_helper;
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
|
2020-05-29 01:41:28 +08:00
|
|
|
struct test_cast_helper<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio, false> {
|
2020-03-27 04:18:19 +08:00
|
|
|
static void run() {}
|
|
|
|
|
};
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
|
2020-05-29 01:41:28 +08:00
|
|
|
struct test_cast_helper<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio, true> {
|
2020-03-27 04:18:19 +08:00
|
|
|
static void run() {
|
2020-05-29 01:41:28 +08:00
|
|
|
typedef typename internal::unpacket_traits<SrcPacket>::type SrcScalar;
|
|
|
|
|
typedef typename internal::unpacket_traits<TgtPacket>::type TgtScalar;
|
|
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
static const int BlockSize = SrcPacketSize * SrcCoeffRatio;
|
|
|
|
|
eigen_assert(BlockSize == TgtPacketSize * TgtCoeffRatio && "Packet sizes and cast ratios are mismatched.");
|
2020-05-29 01:41:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
static const int DataSize = 10 * BlockSize;
|
2020-05-29 01:41:28 +08:00
|
|
|
EIGEN_ALIGN_MAX SrcScalar data1[DataSize];
|
|
|
|
|
EIGEN_ALIGN_MAX TgtScalar data2[DataSize];
|
|
|
|
|
EIGEN_ALIGN_MAX TgtScalar ref[DataSize];
|
2020-03-27 04:18:19 +08:00
|
|
|
|
|
|
|
|
// Construct a packet of scalars that will not overflow when casting
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < DataSize; ++i) {
|
2020-06-26 05:31:16 +08:00
|
|
|
data1[i] = internal::random_without_cast_overflow<SrcScalar, TgtScalar>::value();
|
2020-03-27 04:18:19 +08:00
|
|
|
}
|
|
|
|
|
|
2020-06-26 05:31:16 +08:00
|
|
|
for (int i = 0; i < DataSize; ++i) {
|
|
|
|
|
ref[i] = static_cast<const TgtScalar>(data1[i]);
|
|
|
|
|
}
|
2020-03-27 04:18:19 +08:00
|
|
|
|
2020-05-29 01:41:28 +08:00
|
|
|
pcast_array<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio>::cast(data1, DataSize, data2);
|
|
|
|
|
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, DataSize) && "internal::pcast<>");
|
2024-11-26 14:27:26 +08:00
|
|
|
|
|
|
|
|
// Test that pcast<SrcScalar, TgtScalar> generates the same result.
|
|
|
|
|
for (int i = 0; i < DataSize; ++i) {
|
|
|
|
|
data2[i] = internal::pcast<SrcScalar, TgtScalar>(data1[i]);
|
|
|
|
|
}
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, DataSize) && "internal::pcast<>");
|
2020-03-27 04:18:19 +08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket>
|
|
|
|
|
struct test_cast {
|
|
|
|
|
static void run() {
|
|
|
|
|
typedef typename internal::unpacket_traits<SrcPacket>::type SrcScalar;
|
|
|
|
|
typedef typename internal::unpacket_traits<TgtPacket>::type TgtScalar;
|
|
|
|
|
typedef typename internal::type_casting_traits<SrcScalar, TgtScalar> TypeCastingTraits;
|
|
|
|
|
static const int SrcCoeffRatio = TypeCastingTraits::SrcCoeffRatio;
|
|
|
|
|
static const int TgtCoeffRatio = TypeCastingTraits::TgtCoeffRatio;
|
|
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
|
|
|
|
static const bool HasCast =
|
|
|
|
|
internal::unpacket_traits<SrcPacket>::vectorizable && internal::unpacket_traits<TgtPacket>::vectorizable &&
|
|
|
|
|
TypeCastingTraits::VectorizedCast && (SrcPacketSize * SrcCoeffRatio == TgtPacketSize * TgtCoeffRatio);
|
|
|
|
|
test_cast_helper<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio, HasCast>::run();
|
|
|
|
|
}
|
|
|
|
|
};
|
2020-05-29 01:41:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtScalar,
|
|
|
|
|
typename TgtPacket = typename internal::packet_traits<TgtScalar>::type,
|
|
|
|
|
bool Vectorized = internal::packet_traits<TgtScalar>::Vectorizable,
|
|
|
|
|
bool HasHalf = !internal::is_same<typename internal::unpacket_traits<TgtPacket>::half, TgtPacket>::value>
|
|
|
|
|
struct test_cast_runner;
|
2020-05-29 01:41:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtScalar, typename TgtPacket>
|
|
|
|
|
struct test_cast_runner<SrcPacket, TgtScalar, TgtPacket, true, false> {
|
|
|
|
|
static void run() { test_cast<SrcPacket, TgtPacket>::run(); }
|
|
|
|
|
};
|
2020-05-29 01:41:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtScalar, typename TgtPacket>
|
|
|
|
|
struct test_cast_runner<SrcPacket, TgtScalar, TgtPacket, true, true> {
|
|
|
|
|
static void run() {
|
|
|
|
|
test_cast<SrcPacket, TgtPacket>::run();
|
|
|
|
|
test_cast_runner<SrcPacket, TgtScalar, typename internal::unpacket_traits<TgtPacket>::half>::run();
|
2020-05-29 01:41:28 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename SrcPacket, typename TgtScalar, typename TgtPacket>
|
|
|
|
|
struct test_cast_runner<SrcPacket, TgtScalar, TgtPacket, false, false> {
|
|
|
|
|
static void run() {}
|
|
|
|
|
};
|
|
|
|
|
|
2020-06-26 05:31:16 +08:00
|
|
|
template <typename Scalar, typename Packet, typename EnableIf = void>
|
|
|
|
|
struct packetmath_pcast_ops_runner {
|
|
|
|
|
static void run() {
|
|
|
|
|
test_cast_runner<Packet, float>::run();
|
|
|
|
|
test_cast_runner<Packet, double>::run();
|
|
|
|
|
test_cast_runner<Packet, int8_t>::run();
|
|
|
|
|
test_cast_runner<Packet, uint8_t>::run();
|
|
|
|
|
test_cast_runner<Packet, int16_t>::run();
|
|
|
|
|
test_cast_runner<Packet, uint16_t>::run();
|
|
|
|
|
test_cast_runner<Packet, int32_t>::run();
|
|
|
|
|
test_cast_runner<Packet, uint32_t>::run();
|
|
|
|
|
test_cast_runner<Packet, int64_t>::run();
|
|
|
|
|
test_cast_runner<Packet, uint64_t>::run();
|
|
|
|
|
test_cast_runner<Packet, bool>::run();
|
2020-07-02 02:41:59 +08:00
|
|
|
test_cast_runner<Packet, std::complex<float>>::run();
|
|
|
|
|
test_cast_runner<Packet, std::complex<double>>::run();
|
2020-06-26 05:31:16 +08:00
|
|
|
test_cast_runner<Packet, half>::run();
|
|
|
|
|
test_cast_runner<Packet, bfloat16>::run();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Only some types support cast from std::complex<>.
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
2022-03-17 00:43:40 +08:00
|
|
|
struct packetmath_pcast_ops_runner<Scalar, Packet, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
|
2020-06-26 05:31:16 +08:00
|
|
|
static void run() {
|
2020-07-02 02:41:59 +08:00
|
|
|
test_cast_runner<Packet, std::complex<float>>::run();
|
|
|
|
|
test_cast_runner<Packet, std::complex<double>>::run();
|
2020-06-26 05:31:16 +08:00
|
|
|
test_cast_runner<Packet, half>::run();
|
|
|
|
|
test_cast_runner<Packet, bfloat16>::run();
|
|
|
|
|
}
|
|
|
|
|
};
|
2020-03-27 04:18:19 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void packetmath_boolean_mask_ops() {
|
2024-03-05 01:44:38 +08:00
|
|
|
using RealScalar = typename NumTraits<Scalar>::Real;
|
2020-04-21 04:16:28 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
const int size = 2 * PacketSize;
|
2020-04-21 04:16:28 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-04-21 04:16:28 +08:00
|
|
|
data1[i] = internal::random<Scalar>();
|
|
|
|
|
}
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE1_MASK(internal::ptrue, internal::ptrue);
|
2020-05-12 04:23:31 +08:00
|
|
|
CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2024-03-05 01:44:38 +08:00
|
|
|
data1[i] = Scalar(RealScalar(i));
|
2020-05-12 04:23:31 +08:00
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
2020-10-29 04:15:09 +08:00
|
|
|
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
2020-06-25 04:27:26 +08:00
|
|
|
|
|
|
|
|
// Test (-0) == (0) for signed operations
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = Scalar(-0.0);
|
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
2020-06-25 04:27:26 +08:00
|
|
|
|
|
|
|
|
// Test NaN
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[i] = NumTraits<Scalar>::quiet_NaN();
|
2020-06-25 04:27:26 +08:00
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
2020-05-12 04:23:31 +08:00
|
|
|
}
|
|
|
|
|
|
2021-04-26 04:58:56 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void packetmath_boolean_mask_ops_real() {
|
|
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
|
const int size = 2 * PacketSize;
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = internal::random<Scalar>();
|
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
2021-04-26 04:58:56 +08:00
|
|
|
|
|
|
|
|
// Test (-0) <=/< (0) for signed operations
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = Scalar(-0.0);
|
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
2021-04-26 04:58:56 +08:00
|
|
|
|
|
|
|
|
// Test NaN
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = NumTraits<Scalar>::quiet_NaN();
|
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
2021-04-26 04:58:56 +08:00
|
|
|
}
|
|
|
|
|
|
2023-04-18 07:33:59 +08:00
|
|
|
template <typename Scalar, typename Packet, typename EnableIf = void>
|
|
|
|
|
struct packetmath_boolean_mask_ops_notcomplex_test {
|
|
|
|
|
static void run() {}
|
|
|
|
|
};
|
|
|
|
|
|
2021-04-24 03:51:43 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
2023-04-18 07:33:59 +08:00
|
|
|
struct packetmath_boolean_mask_ops_notcomplex_test<
|
|
|
|
|
Scalar, Packet,
|
|
|
|
|
std::enable_if_t<internal::packet_traits<Scalar>::HasCmp && !internal::is_same<Scalar, bool>::value>> {
|
|
|
|
|
static void run() {
|
|
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
|
const int size = 2 * PacketSize;
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
2021-04-24 03:51:43 +08:00
|
|
|
|
2023-04-18 07:33:59 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = internal::random<Scalar>();
|
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
2021-04-24 03:51:43 +08:00
|
|
|
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_le, internal::pcmp_le);
|
|
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_lt, internal::pcmp_lt);
|
2021-04-24 03:51:43 +08:00
|
|
|
|
|
|
|
|
// Test (-0) <=/< (0) for signed operations
|
2023-04-18 07:33:59 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = Scalar(-0.0);
|
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_le, internal::pcmp_le);
|
|
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_lt, internal::pcmp_lt);
|
2021-04-24 03:51:43 +08:00
|
|
|
|
|
|
|
|
// Test NaN
|
2023-04-18 07:33:59 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = NumTraits<Scalar>::quiet_NaN();
|
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
|
}
|
2025-06-21 06:40:46 +08:00
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_le, internal::pcmp_le);
|
|
|
|
|
CHECK_CWISE2_MASK(internal::pcmp_lt, internal::pcmp_lt);
|
2021-04-24 03:51:43 +08:00
|
|
|
}
|
2023-04-18 07:33:59 +08:00
|
|
|
};
|
2021-04-24 03:51:43 +08:00
|
|
|
|
2023-04-18 07:33:59 +08:00
|
|
|
template <typename Scalar, typename Packet, typename EnableIf = void>
|
|
|
|
|
struct packetmath_minus_zero_add_test {
|
|
|
|
|
static void run() {}
|
|
|
|
|
};
|
2020-04-21 04:16:28 +08:00
|
|
|
|
2020-06-25 04:27:26 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
2023-04-18 07:33:59 +08:00
|
|
|
struct packetmath_minus_zero_add_test<Scalar, Packet, std::enable_if_t<!NumTraits<Scalar>::IsInteger>> {
|
|
|
|
|
static void run() {
|
|
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
|
const int size = 2 * PacketSize;
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size] = {};
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size] = {};
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[size] = {};
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = Scalar(-0.0);
|
|
|
|
|
data1[i + PacketSize] = Scalar(-0.0);
|
|
|
|
|
}
|
|
|
|
|
CHECK_CWISE2_IF(internal::packet_traits<Scalar>::HasAdd, REF_ADD, internal::padd);
|
2020-06-25 04:27:26 +08:00
|
|
|
}
|
2023-04-18 07:33:59 +08:00
|
|
|
};
|
2020-06-25 04:27:26 +08:00
|
|
|
|
2021-03-04 11:22:15 +08:00
|
|
|
// Ensure optimization barrier compiles and doesn't modify contents.
|
|
|
|
|
// Only applies to raw types, so will not work for std::complex, Eigen::half
|
|
|
|
|
// or Eigen::bfloat16. For those you would need to refer to an underlying
|
|
|
|
|
// storage element.
|
|
|
|
|
template <typename Packet, typename EnableIf = void>
|
|
|
|
|
struct eigen_optimization_barrier_test {
|
|
|
|
|
static void run() {}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename Packet>
|
2022-03-17 00:43:40 +08:00
|
|
|
struct eigen_optimization_barrier_test<
|
2021-03-04 11:22:15 +08:00
|
|
|
Packet, std::enable_if_t<!NumTraits<Packet>::IsComplex && !internal::is_same<Packet, Eigen::half>::value &&
|
|
|
|
|
!internal::is_same<Packet, Eigen::bfloat16>::value>> {
|
|
|
|
|
static void run() {
|
|
|
|
|
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
|
|
|
|
Scalar s = internal::random<Scalar>();
|
|
|
|
|
Packet barrier = internal::pset1<Packet>(s);
|
|
|
|
|
EIGEN_OPTIMIZATION_BARRIER(barrier);
|
|
|
|
|
eigen_assert(s == internal::pfirst(barrier) && "EIGEN_OPTIMIZATION_BARRIER");
|
|
|
|
|
}
|
|
|
|
|
};
|
2020-06-25 04:27:26 +08:00
|
|
|
|
2024-04-13 03:35:04 +08:00
|
|
|
template <typename Scalar, typename Packet, bool HasNegate = internal::packet_traits<Scalar>::HasNegate>
|
|
|
|
|
struct negate_test_impl {
|
|
|
|
|
static void run_negate(Scalar* data1, Scalar* data2, Scalar* ref, int PacketSize) {
|
|
|
|
|
CHECK_CWISE1_IF(HasNegate, test::negate, internal::pnegate);
|
|
|
|
|
}
|
|
|
|
|
static void run_nmsub(Scalar* data1, Scalar* data2, Scalar* ref, int PacketSize) {
|
|
|
|
|
CHECK_CWISE3_IF(HasNegate, REF_NMSUB, internal::pnmsub);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
struct negate_test_impl<Scalar, Packet, false> {
|
|
|
|
|
static void run_negate(Scalar*, Scalar*, Scalar*, int) {}
|
|
|
|
|
static void run_nmsub(Scalar*, Scalar*, Scalar*, int) {}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void negate_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
|
|
|
|
|
negate_test_impl<Scalar, Packet>::run_negate(data1, data2, ref, size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void nmsub_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
|
2024-08-28 11:02:47 +08:00
|
|
|
negate_test_impl<Scalar, Packet>::run_nmsub(data1, data2, ref, size);
|
2024-04-13 03:35:04 +08:00
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void packetmath() {
|
2015-10-13 15:53:46 +08:00
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2010-07-05 16:54:24 +08:00
|
|
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
2008-08-21 04:08:38 +08:00
|
|
|
|
2018-11-26 21:10:07 +08:00
|
|
|
if (g_first_pass)
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
std::cerr << "=== Testing packet of type '" << typeid(Packet).name() << "' and scalar type '"
|
|
|
|
|
<< typeid(Scalar).name() << "' and size '" << PacketSize << "' ===\n";
|
2018-11-26 21:10:07 +08:00
|
|
|
|
2014-01-30 03:43:05 +08:00
|
|
|
const int max_size = PacketSize > 4 ? PacketSize : 4;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
const int size = PacketSize * max_size;
|
2015-07-29 17:11:23 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
Adding lowlevel APIs for optimized RHS packet load in TensorFlow
SpatialConvolution
Low-level APIs are added in order to optimized packet load in gemm_pack_rhs
in TensorFlow SpatialConvolution. The optimization is for scenario when a
packet is split across 2 adjacent columns. In this case we read it as two
'partial' packets and then merge these into 1. Currently this only works for
Packet16f (AVX512) and Packet8f (AVX2). We plan to add this for other
packet types (such as Packet8d) also.
This optimization shows significant speedup in SpatialConvolution with
certain parameters. Some examples are below.
Benchmark parameters are specified as:
Batch size, Input dim, Depth, Num of filters, Filter dim
Speedup numbers are specified for number of threads 1, 2, 4, 8, 16.
AVX512:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 |2.18X, 2.13X, 1.73X, 1.64X, 1.66X
128, 24x24, 1, 64, 8x8 |2.00X, 1.98X, 1.93X, 1.91X, 1.91X
32, 24x24, 3, 64, 5x5 |2.26X, 2.14X, 2.17X, 2.22X, 2.33X
128, 24x24, 3, 64, 3x3 |1.51X, 1.45X, 1.45X, 1.67X, 1.57X
32, 14x14, 24, 64, 5x5 |1.21X, 1.19X, 1.16X, 1.70X, 1.17X
128, 128x128, 3, 96, 11x11 |2.17X, 2.18X, 2.19X, 2.20X, 2.18X
AVX2:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 | 1.66X, 1.65X, 1.61X, 1.56X, 1.49X
32, 24x24, 3, 64, 5x5 | 1.71X, 1.63X, 1.77X, 1.58X, 1.68X
128, 24x24, 1, 64, 5x5 | 1.44X, 1.40X, 1.38X, 1.37X, 1.33X
128, 24x24, 3, 64, 3x3 | 1.68X, 1.63X, 1.58X, 1.56X, 1.62X
128, 128x128, 3, 96, 11x11 | 1.36X, 1.36X, 1.37X, 1.37X, 1.37X
In the higher level benchmark cifar10, we observe a runtime improvement
of around 6% for AVX512 on Intel Skylake server (8 cores).
On lower level PackRhs micro-benchmarks specified in TensorFlow
tensorflow/core/kernels/eigen_spatial_convolutions_test.cc, we observe
the following runtime numbers:
AVX512:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 41350 | 15073 | 2.74X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 7277 | 7341 | 0.99X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 8675 | 8681 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 24155 | 16079 | 1.50X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 25052 | 17152 | 1.46X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 18269 | 18345 | 1.00X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 19468 | 19872 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 156060 | 42432 | 3.68X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 132701 | 36944 | 3.59X
AVX2:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 26233 | 12393 | 2.12X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 6091 | 6062 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 7427 | 7408 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 23453 | 20826 | 1.13X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 23167 | 22091 | 1.09X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 23422 | 23682 | 0.99X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 23165 | 23663 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 72689 | 44969 | 1.62X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 61732 | 39779 | 1.55X
All benchmarks on Intel Skylake server with 8 cores.
2019-04-20 14:46:43 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data3[size];
|
2015-07-29 17:11:23 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
2018-07-06 23:13:36 +08:00
|
|
|
RealScalar refvalue = RealScalar(0);
|
2021-03-04 11:22:15 +08:00
|
|
|
|
|
|
|
|
eigen_optimization_barrier_test<Packet>::run();
|
|
|
|
|
eigen_optimization_barrier_test<Scalar>::run();
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2025-03-23 09:05:53 +08:00
|
|
|
data1[i] = internal::random<Scalar>();
|
|
|
|
|
data2[i] = internal::random<Scalar>();
|
2020-02-10 22:58:37 +08:00
|
|
|
refvalue = (std::max)(refvalue, numext::abs(data1[i]));
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-25 22:15:22 +08:00
|
|
|
internal::pstore(data2, internal::pload<Packet>(data1));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(data1, data2, PacketSize) && "aligned load/store");
|
2008-08-21 04:08:38 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
|
internal::pstore(data2, internal::ploadu<Packet>(data1 + offset));
|
|
|
|
|
VERIFY(test::areApprox(data1 + offset, data2, PacketSize) && "internal::ploadu");
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
|
internal::pstoreu(data2 + offset, internal::pload<Packet>(data1));
|
|
|
|
|
VERIFY(test::areApprox(data1, data2 + offset, PacketSize) && "internal::pstoreu");
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
|
2022-06-28 03:18:00 +08:00
|
|
|
for (int M = 0; M < PacketSize; ++M) {
|
|
|
|
|
for (int N = 0; N <= PacketSize; ++N) {
|
|
|
|
|
for (int j = 0; j < size; ++j) {
|
2025-03-23 09:05:53 +08:00
|
|
|
data1[j] = internal::random<Scalar>();
|
|
|
|
|
data2[j] = internal::random<Scalar>();
|
2022-06-28 03:18:00 +08:00
|
|
|
refvalue = (std::max)(refvalue, numext::abs(data1[j]));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (M == 0) {
|
|
|
|
|
internal::pstore_partial(data2, internal::pload_partial<Packet>(data1, N), N);
|
|
|
|
|
VERIFY(test::areApprox(data1, data2, N) && "aligned loadN/storeN");
|
|
|
|
|
|
|
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
|
internal::pstore_partial(data2, internal::ploadu_partial<Packet>(data1 + offset, N), N);
|
|
|
|
|
VERIFY(test::areApprox(data1 + offset, data2, N) && "internal::ploadu_partial");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
|
internal::pstoreu_partial(data2 + offset, internal::pload_partial<Packet>(data1, N), N);
|
|
|
|
|
VERIFY(test::areApprox(data1, data2 + offset, N) && "internal::pstoreu_partial");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (N + M > PacketSize) continue; // Don't read or write past end of Packet
|
|
|
|
|
|
|
|
|
|
internal::pstore_partial(data2, internal::pload_partial<Packet>(data1, N, M), N, M);
|
|
|
|
|
VERIFY(test::areApprox(data1, data2, N) && "aligned offset loadN/storeN");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (internal::unpacket_traits<Packet>::masked_load_available) {
|
2020-01-11 18:31:21 +08:00
|
|
|
test::packet_helper<internal::unpacket_traits<Packet>::masked_load_available, Packet> h;
|
Adding lowlevel APIs for optimized RHS packet load in TensorFlow
SpatialConvolution
Low-level APIs are added in order to optimized packet load in gemm_pack_rhs
in TensorFlow SpatialConvolution. The optimization is for scenario when a
packet is split across 2 adjacent columns. In this case we read it as two
'partial' packets and then merge these into 1. Currently this only works for
Packet16f (AVX512) and Packet8f (AVX2). We plan to add this for other
packet types (such as Packet8d) also.
This optimization shows significant speedup in SpatialConvolution with
certain parameters. Some examples are below.
Benchmark parameters are specified as:
Batch size, Input dim, Depth, Num of filters, Filter dim
Speedup numbers are specified for number of threads 1, 2, 4, 8, 16.
AVX512:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 |2.18X, 2.13X, 1.73X, 1.64X, 1.66X
128, 24x24, 1, 64, 8x8 |2.00X, 1.98X, 1.93X, 1.91X, 1.91X
32, 24x24, 3, 64, 5x5 |2.26X, 2.14X, 2.17X, 2.22X, 2.33X
128, 24x24, 3, 64, 3x3 |1.51X, 1.45X, 1.45X, 1.67X, 1.57X
32, 14x14, 24, 64, 5x5 |1.21X, 1.19X, 1.16X, 1.70X, 1.17X
128, 128x128, 3, 96, 11x11 |2.17X, 2.18X, 2.19X, 2.20X, 2.18X
AVX2:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 | 1.66X, 1.65X, 1.61X, 1.56X, 1.49X
32, 24x24, 3, 64, 5x5 | 1.71X, 1.63X, 1.77X, 1.58X, 1.68X
128, 24x24, 1, 64, 5x5 | 1.44X, 1.40X, 1.38X, 1.37X, 1.33X
128, 24x24, 3, 64, 3x3 | 1.68X, 1.63X, 1.58X, 1.56X, 1.62X
128, 128x128, 3, 96, 11x11 | 1.36X, 1.36X, 1.37X, 1.37X, 1.37X
In the higher level benchmark cifar10, we observe a runtime improvement
of around 6% for AVX512 on Intel Skylake server (8 cores).
On lower level PackRhs micro-benchmarks specified in TensorFlow
tensorflow/core/kernels/eigen_spatial_convolutions_test.cc, we observe
the following runtime numbers:
AVX512:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 41350 | 15073 | 2.74X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 7277 | 7341 | 0.99X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 8675 | 8681 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 24155 | 16079 | 1.50X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 25052 | 17152 | 1.46X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 18269 | 18345 | 1.00X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 19468 | 19872 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 156060 | 42432 | 3.68X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 132701 | 36944 | 3.59X
AVX2:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 26233 | 12393 | 2.12X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 6091 | 6062 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 7427 | 7408 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 23453 | 20826 | 1.13X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 23167 | 22091 | 1.09X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 23422 | 23682 | 0.99X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 23165 | 23663 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 72689 | 44969 | 1.62X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 61732 | 39779 | 1.55X
All benchmarks on Intel Skylake server with 8 cores.
2019-04-20 14:46:43 +08:00
|
|
|
unsigned long long max_umask = (0x1ull << PacketSize);
|
2019-05-03 04:14:18 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
|
for (unsigned long long umask = 0; umask < max_umask; ++umask) {
|
|
|
|
|
h.store(data2, h.load(data1 + offset, umask));
|
|
|
|
|
for (int k = 0; k < PacketSize; ++k) data3[k] = ((umask & (0x1ull << k)) >> k) ? data1[k + offset] : Scalar(0);
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(data3, data2, PacketSize) && "internal::ploadu masked");
|
Adding lowlevel APIs for optimized RHS packet load in TensorFlow
SpatialConvolution
Low-level APIs are added in order to optimized packet load in gemm_pack_rhs
in TensorFlow SpatialConvolution. The optimization is for scenario when a
packet is split across 2 adjacent columns. In this case we read it as two
'partial' packets and then merge these into 1. Currently this only works for
Packet16f (AVX512) and Packet8f (AVX2). We plan to add this for other
packet types (such as Packet8d) also.
This optimization shows significant speedup in SpatialConvolution with
certain parameters. Some examples are below.
Benchmark parameters are specified as:
Batch size, Input dim, Depth, Num of filters, Filter dim
Speedup numbers are specified for number of threads 1, 2, 4, 8, 16.
AVX512:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 |2.18X, 2.13X, 1.73X, 1.64X, 1.66X
128, 24x24, 1, 64, 8x8 |2.00X, 1.98X, 1.93X, 1.91X, 1.91X
32, 24x24, 3, 64, 5x5 |2.26X, 2.14X, 2.17X, 2.22X, 2.33X
128, 24x24, 3, 64, 3x3 |1.51X, 1.45X, 1.45X, 1.67X, 1.57X
32, 14x14, 24, 64, 5x5 |1.21X, 1.19X, 1.16X, 1.70X, 1.17X
128, 128x128, 3, 96, 11x11 |2.17X, 2.18X, 2.19X, 2.20X, 2.18X
AVX2:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 | 1.66X, 1.65X, 1.61X, 1.56X, 1.49X
32, 24x24, 3, 64, 5x5 | 1.71X, 1.63X, 1.77X, 1.58X, 1.68X
128, 24x24, 1, 64, 5x5 | 1.44X, 1.40X, 1.38X, 1.37X, 1.33X
128, 24x24, 3, 64, 3x3 | 1.68X, 1.63X, 1.58X, 1.56X, 1.62X
128, 128x128, 3, 96, 11x11 | 1.36X, 1.36X, 1.37X, 1.37X, 1.37X
In the higher level benchmark cifar10, we observe a runtime improvement
of around 6% for AVX512 on Intel Skylake server (8 cores).
On lower level PackRhs micro-benchmarks specified in TensorFlow
tensorflow/core/kernels/eigen_spatial_convolutions_test.cc, we observe
the following runtime numbers:
AVX512:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 41350 | 15073 | 2.74X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 7277 | 7341 | 0.99X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 8675 | 8681 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 24155 | 16079 | 1.50X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 25052 | 17152 | 1.46X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 18269 | 18345 | 1.00X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 19468 | 19872 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 156060 | 42432 | 3.68X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 132701 | 36944 | 3.59X
AVX2:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 26233 | 12393 | 2.12X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 6091 | 6062 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 7427 | 7408 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 23453 | 20826 | 1.13X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 23167 | 22091 | 1.09X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 23422 | 23682 | 0.99X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 23165 | 23663 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 72689 | 44969 | 1.62X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 61732 | 39779 | 1.55X
All benchmarks on Intel Skylake server with 8 cores.
2019-04-20 14:46:43 +08:00
|
|
|
}
|
|
|
|
|
}
|
2019-05-03 05:52:58 +08:00
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (internal::unpacket_traits<Packet>::masked_store_available) {
|
2020-01-11 18:31:21 +08:00
|
|
|
test::packet_helper<internal::unpacket_traits<Packet>::masked_store_available, Packet> h;
|
2019-05-03 05:52:58 +08:00
|
|
|
unsigned long long max_umask = (0x1ull << PacketSize);
|
2019-05-03 04:14:18 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
|
for (unsigned long long umask = 0; umask < max_umask; ++umask) {
|
2019-05-03 04:14:18 +08:00
|
|
|
internal::pstore(data2, internal::pset1<Packet>(Scalar(0)));
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
h.store(data2, h.loadu(data1 + offset), umask);
|
|
|
|
|
for (int k = 0; k < PacketSize; ++k) data3[k] = ((umask & (0x1ull << k)) >> k) ? data1[k + offset] : Scalar(0);
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(data3, data2, PacketSize) && "internal::pstoreu masked");
|
2019-05-03 04:14:18 +08:00
|
|
|
}
|
|
|
|
|
}
|
Adding lowlevel APIs for optimized RHS packet load in TensorFlow
SpatialConvolution
Low-level APIs are added in order to optimized packet load in gemm_pack_rhs
in TensorFlow SpatialConvolution. The optimization is for scenario when a
packet is split across 2 adjacent columns. In this case we read it as two
'partial' packets and then merge these into 1. Currently this only works for
Packet16f (AVX512) and Packet8f (AVX2). We plan to add this for other
packet types (such as Packet8d) also.
This optimization shows significant speedup in SpatialConvolution with
certain parameters. Some examples are below.
Benchmark parameters are specified as:
Batch size, Input dim, Depth, Num of filters, Filter dim
Speedup numbers are specified for number of threads 1, 2, 4, 8, 16.
AVX512:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 |2.18X, 2.13X, 1.73X, 1.64X, 1.66X
128, 24x24, 1, 64, 8x8 |2.00X, 1.98X, 1.93X, 1.91X, 1.91X
32, 24x24, 3, 64, 5x5 |2.26X, 2.14X, 2.17X, 2.22X, 2.33X
128, 24x24, 3, 64, 3x3 |1.51X, 1.45X, 1.45X, 1.67X, 1.57X
32, 14x14, 24, 64, 5x5 |1.21X, 1.19X, 1.16X, 1.70X, 1.17X
128, 128x128, 3, 96, 11x11 |2.17X, 2.18X, 2.19X, 2.20X, 2.18X
AVX2:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 | 1.66X, 1.65X, 1.61X, 1.56X, 1.49X
32, 24x24, 3, 64, 5x5 | 1.71X, 1.63X, 1.77X, 1.58X, 1.68X
128, 24x24, 1, 64, 5x5 | 1.44X, 1.40X, 1.38X, 1.37X, 1.33X
128, 24x24, 3, 64, 3x3 | 1.68X, 1.63X, 1.58X, 1.56X, 1.62X
128, 128x128, 3, 96, 11x11 | 1.36X, 1.36X, 1.37X, 1.37X, 1.37X
In the higher level benchmark cifar10, we observe a runtime improvement
of around 6% for AVX512 on Intel Skylake server (8 cores).
On lower level PackRhs micro-benchmarks specified in TensorFlow
tensorflow/core/kernels/eigen_spatial_convolutions_test.cc, we observe
the following runtime numbers:
AVX512:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 41350 | 15073 | 2.74X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 7277 | 7341 | 0.99X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 8675 | 8681 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 24155 | 16079 | 1.50X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 25052 | 17152 | 1.46X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 18269 | 18345 | 1.00X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 19468 | 19872 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 156060 | 42432 | 3.68X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 132701 | 36944 | 3.59X
AVX2:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 26233 | 12393 | 2.12X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 6091 | 6062 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 7427 | 7408 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 23453 | 20826 | 1.13X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 23167 | 22091 | 1.09X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 23422 | 23682 | 0.99X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 23165 | 23663 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 72689 | 44969 | 1.62X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 61732 | 39779 | 1.55X
All benchmarks on Intel Skylake server with 8 cores.
2019-04-20 14:46:43 +08:00
|
|
|
}
|
|
|
|
|
|
2015-10-13 15:53:46 +08:00
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd);
|
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
|
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd);
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub);
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
|
2015-10-13 15:53:46 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
|
|
|
|
|
|
2024-04-13 03:35:04 +08:00
|
|
|
negate_test<Scalar, Packet>(data1, data2, ref, PacketSize);
|
2022-01-22 07:49:18 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasReciprocal, REF_RECIPROCAL, internal::preciprocal);
|
2013-06-11 05:40:56 +08:00
|
|
|
CHECK_CWISE1(numext::conj, internal::pconj);
|
2025-04-02 06:12:00 +08:00
|
|
|
|
2022-08-10 03:54:57 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign);
|
2008-08-21 04:08:38 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < 3; ++offset) {
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[offset];
|
2011-02-24 02:24:26 +08:00
|
|
|
internal::pstore(data2, internal::pset1<Packet>(data1[offset]));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pset1");
|
2011-02-24 02:24:26 +08:00
|
|
|
}
|
2016-03-28 22:58:02 +08:00
|
|
|
|
2014-04-25 17:21:18 +08:00
|
|
|
{
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize * 4; ++i) ref[i] = data1[i / PacketSize];
|
2014-04-25 17:21:18 +08:00
|
|
|
Packet A0, A1, A2, A3;
|
2014-04-25 17:46:22 +08:00
|
|
|
internal::pbroadcast4<Packet>(data1, A0, A1, A2, A3);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::pstore(data2 + 0 * PacketSize, A0);
|
|
|
|
|
internal::pstore(data2 + 1 * PacketSize, A1);
|
|
|
|
|
internal::pstore(data2 + 2 * PacketSize, A2);
|
|
|
|
|
internal::pstore(data2 + 3 * PacketSize, A3);
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, 4 * PacketSize) && "internal::pbroadcast4");
|
2014-04-25 17:21:18 +08:00
|
|
|
}
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2014-04-25 17:21:18 +08:00
|
|
|
{
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize * 2; ++i) ref[i] = data1[i / PacketSize];
|
2014-05-05 21:03:29 +08:00
|
|
|
Packet A0, A1;
|
2014-04-25 17:46:22 +08:00
|
|
|
internal::pbroadcast2<Packet>(data1, A0, A1);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::pstore(data2 + 0 * PacketSize, A0);
|
|
|
|
|
internal::pstore(data2 + 1 * PacketSize, A1);
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, 2 * PacketSize) && "internal::pbroadcast2");
|
2014-04-25 17:21:18 +08:00
|
|
|
}
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2010-10-25 22:15:22 +08:00
|
|
|
VERIFY(internal::isApprox(data1[0], internal::pfirst(internal::pload<Packet>(data1))) && "internal::pfirst");
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketSize > 1) {
|
2018-09-28 22:57:32 +08:00
|
|
|
// apply different offsets to check that ploaddup is robust to unaligned inputs
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < 4; ++offset) {
|
|
|
|
|
for (int i = 0; i < PacketSize / 2; ++i) ref[2 * i + 0] = ref[2 * i + 1] = data1[offset + i];
|
|
|
|
|
internal::pstore(data2, internal::ploaddup<Packet>(data1 + offset));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "ploaddup");
|
2011-02-24 05:22:10 +08:00
|
|
|
}
|
2011-02-23 23:20:55 +08:00
|
|
|
}
|
2016-03-28 06:47:49 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketSize > 2) {
|
2018-09-28 22:57:32 +08:00
|
|
|
// apply different offsets to check that ploadquad is robust to unaligned inputs
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < 4; ++offset) {
|
|
|
|
|
for (int i = 0; i < PacketSize / 4; ++i)
|
|
|
|
|
ref[4 * i + 0] = ref[4 * i + 1] = ref[4 * i + 2] = ref[4 * i + 3] = data1[offset + i];
|
|
|
|
|
internal::pstore(data2, internal::ploadquad<Packet>(data1 + offset));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "ploadquad");
|
2014-04-17 22:27:22 +08:00
|
|
|
}
|
|
|
|
|
}
|
2008-08-21 04:08:38 +08:00
|
|
|
|
2018-07-06 23:13:36 +08:00
|
|
|
ref[0] = Scalar(0);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] += data1[i];
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::isApproxAbs(ref[0], internal::predux(internal::pload<Packet>(data1)), refvalue) && "internal::predux");
|
2016-03-28 22:58:02 +08:00
|
|
|
|
2021-05-21 22:12:25 +08:00
|
|
|
if (!internal::is_same<Packet, typename internal::unpacket_traits<Packet>::half>::value) {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
int HalfPacketSize = PacketSize > 4 ? PacketSize / 2 : PacketSize;
|
|
|
|
|
for (int i = 0; i < HalfPacketSize; ++i) ref[i] = Scalar(0);
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i % HalfPacketSize] += data1[i];
|
2025-11-07 05:52:19 +08:00
|
|
|
internal::pstore(data2, internal::predux_half(internal::pload<Packet>(data1)));
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, HalfPacketSize) && "internal::predux_half");
|
2014-04-17 22:27:22 +08:00
|
|
|
}
|
2009-03-10 02:40:09 +08:00
|
|
|
|
2024-04-26 23:28:03 +08:00
|
|
|
// Avoid overflows.
|
2024-04-30 04:32:09 +08:00
|
|
|
if (NumTraits<Scalar>::IsInteger && NumTraits<Scalar>::IsSigned &&
|
|
|
|
|
Eigen::internal::unpacket_traits<Packet>::size > 1) {
|
2025-03-23 09:05:53 +08:00
|
|
|
Scalar limit = static_cast<Scalar>(
|
|
|
|
|
static_cast<RealScalar>(std::pow(static_cast<double>(numext::real(NumTraits<Scalar>::highest())),
|
|
|
|
|
1.0 / static_cast<double>(Eigen::internal::unpacket_traits<Packet>::size))));
|
2024-04-26 23:28:03 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2025-03-23 09:05:53 +08:00
|
|
|
data1[i] = internal::random<Scalar>(Scalar(0) - limit, limit);
|
2024-04-26 23:28:03 +08:00
|
|
|
}
|
2025-06-21 06:40:46 +08:00
|
|
|
} else if (!NumTraits<Scalar>::IsInteger && !NumTraits<Scalar>::IsComplex && !std::is_same<Scalar, bool>::value) {
|
2025-03-24 13:31:25 +08:00
|
|
|
// Prevent very small product results by adjusting range. Otherwise,
|
|
|
|
|
// we may end up with multiplying e.g. 32 Eigen::halfs with values < 1.
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2025-06-06 06:48:57 +08:00
|
|
|
data1[i] = REF_MUL(internal::random<Scalar>(Scalar(0.5), Scalar(1)),
|
|
|
|
|
(internal::random<bool>() ? Scalar(-1) : Scalar(1)));
|
2025-03-24 13:31:25 +08:00
|
|
|
}
|
2024-04-26 23:28:03 +08:00
|
|
|
}
|
2018-07-06 23:13:36 +08:00
|
|
|
ref[0] = Scalar(1);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = REF_MUL(ref[0], data1[i]);
|
2010-10-25 22:15:22 +08:00
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
|
2009-03-10 02:40:09 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[PacketSize - i - 1];
|
2010-10-25 22:15:22 +08:00
|
|
|
internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::preverse");
|
2014-03-27 10:03:07 +08:00
|
|
|
|
2014-04-25 16:56:18 +08:00
|
|
|
internal::PacketBlock<Packet> kernel;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
kernel.packet[i] = internal::pload<Packet>(data1 + i * PacketSize);
|
2014-03-27 10:03:07 +08:00
|
|
|
}
|
|
|
|
|
ptranspose(kernel);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2014-03-27 10:03:07 +08:00
|
|
|
internal::pstore(data2, kernel.packet[i]);
|
|
|
|
|
for (int j = 0; j < PacketSize; ++j) {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(test::isApproxAbs(data2[j], data1[i + j * PacketSize], refvalue) && "ptranspose");
|
2021-05-19 16:26:45 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-25 12:34:35 +08:00
|
|
|
// GeneralBlockPanelKernel also checks PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize>;
|
|
|
|
|
if (PacketSize > 4 && PacketSize % 4 == 0) {
|
|
|
|
|
internal::PacketBlock<Packet, PacketSize % 4 == 0 ? 4 : PacketSize> kernel2;
|
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
|
kernel2.packet[i] = internal::pload<Packet>(data1 + i * PacketSize);
|
2021-05-19 16:26:45 +08:00
|
|
|
}
|
2021-05-25 12:34:35 +08:00
|
|
|
ptranspose(kernel2);
|
|
|
|
|
int data_counter = 0;
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
for (int j = 0; j < 4; ++j) {
|
|
|
|
|
data2[data_counter++] = data1[j * PacketSize + i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
|
internal::pstore(data3, kernel2.packet[i]);
|
|
|
|
|
for (int j = 0; j < PacketSize; ++j) {
|
|
|
|
|
VERIFY(test::isApproxAbs(data3[j], data2[i * PacketSize + j], refvalue) && "ptranspose");
|
|
|
|
|
}
|
2014-03-27 10:03:07 +08:00
|
|
|
}
|
|
|
|
|
}
|
2014-06-07 11:18:44 +08:00
|
|
|
|
2019-06-21 02:47:49 +08:00
|
|
|
{
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
// "if" mask
|
2024-02-10 03:46:45 +08:00
|
|
|
// Note: it's UB to load 0xFF directly into a `bool`.
|
|
|
|
|
uint8_t v =
|
|
|
|
|
internal::random<bool>() ? (std::is_same<Scalar, bool>::value ? static_cast<uint8_t>(true) : 0xff) : 0;
|
|
|
|
|
// Avoid strict aliasing violation by using memset.
|
2024-02-13 07:27:48 +08:00
|
|
|
memset(static_cast<void*>(data1 + i), v, sizeof(Scalar));
|
2019-06-21 02:47:49 +08:00
|
|
|
// "then" packet
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
data1[i + PacketSize] = internal::random<Scalar>();
|
2019-06-21 02:47:49 +08:00
|
|
|
// "else" packet
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
data1[i + 2 * PacketSize] = internal::random<Scalar>();
|
2019-06-21 02:47:49 +08:00
|
|
|
}
|
|
|
|
|
CHECK_CWISE3_IF(true, internal::pselect, internal::pselect);
|
|
|
|
|
}
|
2019-01-08 08:53:36 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-04-21 04:16:28 +08:00
|
|
|
data1[i] = internal::random<Scalar>();
|
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
CHECK_CWISE1(internal::pzero, internal::pzero);
|
|
|
|
|
CHECK_CWISE2_IF(true, internal::por, internal::por);
|
|
|
|
|
CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
|
|
|
|
|
CHECK_CWISE2_IF(true, internal::pand, internal::pand);
|
2020-04-21 04:16:28 +08:00
|
|
|
|
2020-05-12 04:23:31 +08:00
|
|
|
packetmath_boolean_mask_ops<Scalar, Packet>();
|
2020-06-26 05:31:16 +08:00
|
|
|
packetmath_pcast_ops_runner<Scalar, Packet>::run();
|
2023-04-18 07:33:59 +08:00
|
|
|
packetmath_minus_zero_add_test<Scalar, Packet>::run();
|
2020-12-09 10:13:35 +08:00
|
|
|
|
2022-01-26 12:25:41 +08:00
|
|
|
CHECK_CWISE3_IF(true, REF_MADD, internal::pmadd);
|
2022-08-03 01:42:45 +08:00
|
|
|
if (!std::is_same<Scalar, bool>::value && NumTraits<Scalar>::IsSigned) {
|
2024-04-13 03:35:04 +08:00
|
|
|
nmsub_test<Scalar, Packet>(data1, data2, ref, PacketSize);
|
2022-08-03 01:42:45 +08:00
|
|
|
}
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2022-08-03 01:42:45 +08:00
|
|
|
// For pmsub, pnmadd, the values can cancel each other to become near zero,
|
|
|
|
|
// which can lead to very flaky tests. Here we ensure the signs are such that
|
|
|
|
|
// they do not cancel.
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2025-03-13 01:06:32 +08:00
|
|
|
data1[i] = abs_helper(internal::random<Scalar>());
|
|
|
|
|
data1[i + PacketSize] = abs_helper(internal::random<Scalar>());
|
|
|
|
|
data1[i + 2 * PacketSize] = Scalar(0) - abs_helper(internal::random<Scalar>());
|
2022-08-03 01:42:45 +08:00
|
|
|
}
|
2022-01-27 10:20:03 +08:00
|
|
|
if (!std::is_same<Scalar, bool>::value && NumTraits<Scalar>::IsSigned) {
|
2022-01-26 12:25:41 +08:00
|
|
|
CHECK_CWISE3_IF(true, REF_MSUB, internal::pmsub);
|
2024-04-13 03:35:04 +08:00
|
|
|
CHECK_CWISE3_IF(true, REF_NMADD, internal::pnmadd);
|
2022-01-26 12:25:41 +08:00
|
|
|
}
|
2025-03-13 01:06:32 +08:00
|
|
|
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt);
|
2025-04-18 07:31:20 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasCbrt, numext::cbrt, internal::pcbrt);
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
|
2020-12-05 05:45:09 +08:00
|
|
|
// Notice that this definition works for complex types as well.
|
|
|
|
|
// c++11 has std::log2 for real, but not for complex types.
|
|
|
|
|
template <typename Scalar>
|
|
|
|
|
Scalar log2(Scalar x) {
|
2020-12-12 06:34:31 +08:00
|
|
|
return Scalar(EIGEN_LOG2E) * std::log(x);
|
2020-12-05 05:45:09 +08:00
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void packetmath_real() {
|
2015-10-13 15:53:46 +08:00
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2009-03-25 20:26:13 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
const int size = PacketSize * 4;
|
2022-11-16 02:07:23 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4] = {};
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4] = {};
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4] = {};
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2022-11-16 02:07:23 +08:00
|
|
|
// Negate with -0.
|
|
|
|
|
if (PacketTraits::HasNegate) {
|
|
|
|
|
test::packet_helper<PacketTraits::HasNegate, Packet> h;
|
|
|
|
|
data1[0] = Scalar{-0};
|
|
|
|
|
h.store(data2, internal::pnegate(h.load(data1)));
|
|
|
|
|
typedef typename internal::make_unsigned<typename internal::make_integer<Scalar>::type>::type Bits;
|
|
|
|
|
Bits bits = numext::bit_cast<Bits>(data2[0]);
|
|
|
|
|
VERIFY_IS_EQUAL(bits, static_cast<Bits>(Bits(1) << (sizeof(Scalar) * CHAR_BIT - 1)));
|
|
|
|
|
}
|
2010-07-05 16:54:24 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
|
|
|
|
data2[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
2020-01-11 18:31:21 +08:00
|
|
|
}
|
|
|
|
|
|
2020-10-10 04:05:49 +08:00
|
|
|
if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = Scalar(0);
|
2020-01-11 18:31:21 +08:00
|
|
|
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
|
2020-12-05 05:45:09 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasLog, log2, internal::plog2);
|
2021-02-27 05:59:46 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt);
|
2020-01-11 18:31:21 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3)));
|
|
|
|
|
data2[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3)));
|
2009-03-25 20:26:13 +08:00
|
|
|
}
|
2015-10-13 15:53:46 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin);
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan);
|
2015-11-05 05:15:57 +08:00
|
|
|
|
2021-03-04 11:22:15 +08:00
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
2024-04-30 07:45:49 +08:00
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::ceil, internal::pceil);
|
|
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::floor, internal::pfloor);
|
|
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::rint, internal::print);
|
|
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::trunc, internal::ptrunc);
|
2022-08-10 03:54:57 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign);
|
2021-04-26 04:58:56 +08:00
|
|
|
|
|
|
|
|
packetmath_boolean_mask_ops_real<Scalar, Packet>();
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2021-02-26 06:29:49 +08:00
|
|
|
// Rounding edge cases.
|
2024-04-30 07:45:49 +08:00
|
|
|
if (PacketTraits::HasRound) {
|
2021-02-26 06:29:49 +08:00
|
|
|
typedef typename internal::make_integer<Scalar>::type IntType;
|
|
|
|
|
// Start with values that cannot fit inside an integer, work down to less than one.
|
2021-02-26 12:39:56 +08:00
|
|
|
Scalar val =
|
|
|
|
|
numext::mini(Scalar(2) * static_cast<Scalar>(NumTraits<IntType>::highest()), NumTraits<Scalar>::highest());
|
2021-02-26 06:29:49 +08:00
|
|
|
std::vector<Scalar> values;
|
|
|
|
|
while (val > Scalar(0.25)) {
|
|
|
|
|
// Cover both even and odd, positive and negative cases.
|
|
|
|
|
values.push_back(val);
|
|
|
|
|
values.push_back(val + Scalar(0.3));
|
|
|
|
|
values.push_back(val + Scalar(0.5));
|
|
|
|
|
values.push_back(val + Scalar(0.8));
|
|
|
|
|
values.push_back(val + Scalar(1));
|
|
|
|
|
values.push_back(val + Scalar(1.3));
|
|
|
|
|
values.push_back(val + Scalar(1.5));
|
|
|
|
|
values.push_back(val + Scalar(1.8));
|
|
|
|
|
values.push_back(-val);
|
|
|
|
|
values.push_back(-val - Scalar(0.3));
|
|
|
|
|
values.push_back(-val - Scalar(0.5));
|
|
|
|
|
values.push_back(-val - Scalar(0.8));
|
|
|
|
|
values.push_back(-val - Scalar(1));
|
|
|
|
|
values.push_back(-val - Scalar(1.3));
|
|
|
|
|
values.push_back(-val - Scalar(1.5));
|
|
|
|
|
values.push_back(-val - Scalar(1.8));
|
|
|
|
|
values.push_back(Scalar(-1.5) + val); // Bug 1785.
|
|
|
|
|
val = val / Scalar(2);
|
|
|
|
|
}
|
2021-02-26 06:29:49 +08:00
|
|
|
values.push_back(NumTraits<Scalar>::infinity());
|
|
|
|
|
values.push_back(-NumTraits<Scalar>::infinity());
|
|
|
|
|
values.push_back(NumTraits<Scalar>::quiet_NaN());
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2021-02-26 06:29:49 +08:00
|
|
|
for (size_t k = 0; k < values.size(); ++k) {
|
|
|
|
|
data1[0] = values[k];
|
2021-03-04 11:22:15 +08:00
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
2024-04-30 07:45:49 +08:00
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::ceil, internal::pceil);
|
|
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::floor, internal::pfloor);
|
|
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::rint, internal::print);
|
|
|
|
|
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::trunc, internal::ptrunc);
|
2021-02-26 06:29:49 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
}
|
2019-12-13 03:04:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[i] = Scalar(internal::random<double>(-1, 1));
|
|
|
|
|
data2[i] = Scalar(internal::random<double>(-1, 1));
|
2011-02-18 00:37:11 +08:00
|
|
|
}
|
2015-10-13 15:53:46 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
|
2023-02-21 11:14:05 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasATan, std::atan, internal::patan);
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasATanh, std::atanh, internal::patanh);
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[i] = Scalar(internal::random<double>(-87, 88));
|
|
|
|
|
data2[i] = Scalar(internal::random<double>(-87, 88));
|
2021-12-09 01:57:23 +08:00
|
|
|
data1[0] = -NumTraits<Scalar>::infinity();
|
2009-03-25 20:26:13 +08:00
|
|
|
}
|
2015-10-13 15:53:46 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp);
|
2024-10-23 06:09:34 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp2, internal::pexp2);
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2021-01-21 11:00:09 +08:00
|
|
|
CHECK_CWISE1_BYREF1_IF(PacketTraits::HasExp, REF_FREXP, internal::pfrexp);
|
2021-02-13 03:32:29 +08:00
|
|
|
if (PacketTraits::HasExp) {
|
|
|
|
|
// Check denormals:
|
2023-08-29 08:36:07 +08:00
|
|
|
#if !EIGEN_ARCH_ARM
|
2021-02-13 03:32:29 +08:00
|
|
|
for (int j = 0; j < 3; ++j) {
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = Scalar(std::ldexp(1, NumTraits<Scalar>::min_exponent() - j));
|
2021-02-13 03:32:29 +08:00
|
|
|
CHECK_CWISE1_BYREF1_IF(PacketTraits::HasExp, REF_FREXP, internal::pfrexp);
|
|
|
|
|
data1[0] = -data1[0];
|
|
|
|
|
CHECK_CWISE1_BYREF1_IF(PacketTraits::HasExp, REF_FREXP, internal::pfrexp);
|
|
|
|
|
}
|
2023-08-29 08:36:07 +08:00
|
|
|
#endif
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2021-02-13 03:32:29 +08:00
|
|
|
// zero
|
|
|
|
|
data1[0] = Scalar(0);
|
|
|
|
|
CHECK_CWISE1_BYREF1_IF(PacketTraits::HasExp, REF_FREXP, internal::pfrexp);
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2021-02-13 03:32:29 +08:00
|
|
|
// inf and NaN only compare output fraction, not exponent.
|
|
|
|
|
test::packet_helper<PacketTraits::HasExp, Packet> h;
|
|
|
|
|
Packet pout;
|
|
|
|
|
Scalar sout;
|
|
|
|
|
Scalar special[] = {NumTraits<Scalar>::infinity(), -NumTraits<Scalar>::infinity(), NumTraits<Scalar>::quiet_NaN()};
|
|
|
|
|
for (int i = 0; i < 3; ++i) {
|
|
|
|
|
data1[0] = special[i];
|
|
|
|
|
ref[0] = Scalar(REF_FREXP(data1[0], ref[PacketSize]));
|
|
|
|
|
h.store(data2, internal::pfrexp(h.load(data1), h.forward_reference(pout, sout)));
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, 1) && "internal::pfrexp");
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-04-18 07:33:59 +08:00
|
|
|
|
2021-01-21 11:00:09 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
data1[i] = Scalar(internal::random<double>(-1, 1));
|
|
|
|
|
data2[i] = Scalar(internal::random<double>(-1, 1));
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2020-10-12 19:24:08 +08:00
|
|
|
data1[i + PacketSize] = Scalar(internal::random<int>(-4, 4));
|
|
|
|
|
data2[i + PacketSize] = Scalar(internal::random<double>(-4, 4));
|
2021-01-21 11:00:09 +08:00
|
|
|
}
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
2020-10-12 19:24:08 +08:00
|
|
|
if (PacketTraits::HasExp) {
|
|
|
|
|
data1[0] = Scalar(-1);
|
|
|
|
|
// underflow to zero
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[PacketSize] = Scalar(NumTraits<Scalar>::min_exponent() - 55);
|
2020-10-12 19:24:08 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
|
|
|
|
// overflow to inf
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[PacketSize] = Scalar(NumTraits<Scalar>::max_exponent() + 10);
|
2020-10-12 19:24:08 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
|
|
|
|
// NaN stays NaN
|
|
|
|
|
data1[0] = NumTraits<Scalar>::quiet_NaN();
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
|
// inf stays inf
|
|
|
|
|
data1[0] = NumTraits<Scalar>::infinity();
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[PacketSize] = Scalar(NumTraits<Scalar>::min_exponent() - 10);
|
2020-10-12 19:24:08 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
|
|
|
|
// zero stays zero
|
|
|
|
|
data1[0] = Scalar(0);
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[PacketSize] = Scalar(NumTraits<Scalar>::max_exponent() + 10);
|
2020-10-12 19:24:08 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
|
|
|
|
// Small number big exponent.
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = Scalar(std::ldexp(Scalar(1.0), NumTraits<Scalar>::min_exponent() - 1));
|
|
|
|
|
data1[PacketSize] = Scalar(-NumTraits<Scalar>::min_exponent() + NumTraits<Scalar>::max_exponent());
|
2020-10-12 19:24:08 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
|
|
|
|
// Big number small exponent.
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = Scalar(std::ldexp(Scalar(1.0), NumTraits<Scalar>::max_exponent() - 1));
|
|
|
|
|
data1[PacketSize] = Scalar(+NumTraits<Scalar>::min_exponent() - NumTraits<Scalar>::max_exponent());
|
2020-10-12 19:24:08 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
|
|
|
|
}
|
2021-01-21 11:00:09 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
|
|
|
|
data2[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
2016-02-11 09:41:47 +08:00
|
|
|
}
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[0] = Scalar(1e-20);
|
2016-02-11 09:41:47 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasExp && PacketSize >= 2) {
|
2021-03-17 11:12:46 +08:00
|
|
|
const Scalar small = NumTraits<Scalar>::epsilon();
|
|
|
|
|
data1[0] = NumTraits<Scalar>::quiet_NaN();
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[1] = small;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test::packet_helper<PacketTraits::HasExp, Packet> h;
|
2015-06-24 10:12:46 +08:00
|
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
2015-08-16 20:00:02 +08:00
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
2025-11-13 06:19:50 +08:00
|
|
|
VERIFY_IS_APPROX(std::exp(small), data2[1]);
|
2015-06-24 10:12:46 +08:00
|
|
|
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[0] = -small;
|
|
|
|
|
data1[1] = Scalar(0);
|
2015-06-24 10:12:46 +08:00
|
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
2025-11-13 06:19:50 +08:00
|
|
|
VERIFY_IS_APPROX(std::exp(-small), data2[0]);
|
2015-12-11 09:17:42 +08:00
|
|
|
VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]);
|
2015-06-24 10:12:46 +08:00
|
|
|
|
|
|
|
|
data1[0] = (std::numeric_limits<Scalar>::min)();
|
|
|
|
|
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
|
|
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
2020-11-24 08:11:01 +08:00
|
|
|
VERIFY_IS_APPROX(std::exp((std::numeric_limits<Scalar>::min)()), data2[0]);
|
|
|
|
|
VERIFY_IS_APPROX(std::exp(-(std::numeric_limits<Scalar>::min)()), data2[1]);
|
2015-06-24 10:12:46 +08:00
|
|
|
|
|
|
|
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
|
|
|
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
|
|
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
2020-11-24 08:11:01 +08:00
|
|
|
VERIFY_IS_APPROX(std::exp(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
|
|
|
|
VERIFY_IS_APPROX(std::exp(-std::numeric_limits<Scalar>::denorm_min()), data2[1]);
|
2014-10-20 17:38:51 +08:00
|
|
|
}
|
2010-07-05 16:54:24 +08:00
|
|
|
|
2016-05-11 07:21:43 +08:00
|
|
|
if (PacketTraits::HasTanh) {
|
2016-09-22 17:18:52 +08:00
|
|
|
// NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details.
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = NumTraits<Scalar>::quiet_NaN();
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test::packet_helper<internal::packet_traits<Scalar>::HasTanh, Packet> h;
|
2016-05-11 07:21:43 +08:00
|
|
|
h.store(data2, internal::ptanh(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-05 07:24:47 +08:00
|
|
|
if (PacketTraits::HasExp) {
|
2020-05-31 06:53:37 +08:00
|
|
|
internal::scalar_logistic_op<Scalar> logistic;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[i] = Scalar(internal::random<double>(-20, 20));
|
2020-05-31 06:53:37 +08:00
|
|
|
}
|
2020-06-05 07:24:47 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test::packet_helper<PacketTraits::HasExp, Packet> h;
|
2020-06-05 07:24:47 +08:00
|
|
|
h.store(data2, logistic.packetOp(h.load(data1)));
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
|
VERIFY_IS_APPROX(data2[i], logistic(data1[i]));
|
2020-05-31 06:53:37 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-30 03:18:57 +08:00
|
|
|
#if EIGEN_HAS_C99_MATH
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = NumTraits<Scalar>::infinity();
|
2019-08-29 03:20:21 +08:00
|
|
|
data1[1] = Scalar(-1);
|
2019-08-13 04:53:28 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p);
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = NumTraits<Scalar>::infinity();
|
|
|
|
|
data1[1] = -NumTraits<Scalar>::infinity();
|
2019-08-29 03:20:21 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasExpm1, std::expm1, internal::pexpm1);
|
2015-12-08 08:38:48 +08:00
|
|
|
#endif
|
2015-12-08 07:24:49 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketSize >= 2) {
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = NumTraits<Scalar>::quiet_NaN();
|
|
|
|
|
data1[1] = NumTraits<Scalar>::epsilon();
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasLog) {
|
|
|
|
|
test::packet_helper<PacketTraits::HasLog, Packet> h;
|
2018-04-26 16:47:39 +08:00
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
2020-11-25 08:28:07 +08:00
|
|
|
// TODO(cantonios): Re-enable for bfloat16.
|
|
|
|
|
if (!internal::is_same<Scalar, bfloat16>::value) {
|
|
|
|
|
VERIFY_IS_APPROX(std::log(data1[1]), data2[1]);
|
|
|
|
|
}
|
2018-04-26 16:47:39 +08:00
|
|
|
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = -NumTraits<Scalar>::epsilon();
|
2020-10-10 04:05:49 +08:00
|
|
|
data1[1] = Scalar(0);
|
2018-04-26 16:47:39 +08:00
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
|
VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]);
|
|
|
|
|
|
|
|
|
|
data1[0] = (std::numeric_limits<Scalar>::min)();
|
|
|
|
|
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
|
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
2020-11-25 08:28:07 +08:00
|
|
|
// TODO(cantonios): Re-enable for bfloat16.
|
|
|
|
|
if (!internal::is_same<Scalar, bfloat16>::value) {
|
|
|
|
|
VERIFY_IS_APPROX(std::log((std::numeric_limits<Scalar>::min)()), data2[0]);
|
|
|
|
|
}
|
2018-04-26 16:47:39 +08:00
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
|
|
2020-06-23 23:58:34 +08:00
|
|
|
// Note: 32-bit arm always flushes denorms to zero.
|
|
|
|
|
#if !EIGEN_ARCH_ARM
|
2020-06-30 02:35:46 +08:00
|
|
|
if (std::numeric_limits<Scalar>::has_denorm == std::denorm_present) {
|
2020-06-23 23:58:34 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
|
|
|
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
|
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
2022-01-07 09:10:17 +08:00
|
|
|
// TODO(rmlarsen): Re-enable for bfloat16.
|
|
|
|
|
if (!internal::is_same<Scalar, bfloat16>::value) {
|
|
|
|
|
VERIFY_IS_APPROX(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
|
|
|
|
}
|
2020-06-23 23:58:34 +08:00
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2018-04-26 16:47:39 +08:00
|
|
|
|
|
|
|
|
data1[0] = Scalar(-1.0f);
|
|
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
2018-12-23 22:40:52 +08:00
|
|
|
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = NumTraits<Scalar>::infinity();
|
2018-12-23 22:40:52 +08:00
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isinf)(data2[0]));
|
2018-04-26 16:47:39 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasLog1p) {
|
|
|
|
|
test::packet_helper<PacketTraits::HasLog1p, Packet> h;
|
2019-08-29 03:20:21 +08:00
|
|
|
data1[0] = Scalar(-2);
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[1] = -NumTraits<Scalar>::infinity();
|
2019-08-29 03:20:21 +08:00
|
|
|
h.store(data2, internal::plog1p(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
|
}
|
2022-02-05 08:20:13 +08:00
|
|
|
|
2020-11-24 08:11:01 +08:00
|
|
|
// TODO(rmlarsen): Re-enable for half and bfloat16.
|
|
|
|
|
if (PacketTraits::HasCos && !internal::is_same<Scalar, half>::value &&
|
|
|
|
|
!internal::is_same<Scalar, bfloat16>::value) {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test::packet_helper<PacketTraits::HasCos, Packet> h;
|
2021-03-17 11:12:46 +08:00
|
|
|
for (Scalar k = Scalar(1); k < Scalar(10000) / NumTraits<Scalar>::epsilon(); k *= Scalar(2)) {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int k1 = 0; k1 <= 1; ++k1) {
|
2020-11-17 02:39:09 +08:00
|
|
|
data1[0] = Scalar((2 * double(k) + k1) * double(EIGEN_PI) / 2 * internal::random<double>(0.8, 1.2));
|
|
|
|
|
data1[1] = Scalar((2 * double(k) + 2 + k1) * double(EIGEN_PI) / 2 * internal::random<double>(0.8, 1.2));
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
|
|
|
h.store(data2 + PacketSize, internal::psin(h.load(data1)));
|
|
|
|
|
VERIFY(data2[0] <= Scalar(1.) && data2[0] >= Scalar(-1.));
|
|
|
|
|
VERIFY(data2[1] <= Scalar(1.) && data2[1] >= Scalar(-1.));
|
|
|
|
|
VERIFY(data2[PacketSize + 0] <= Scalar(1.) && data2[PacketSize + 0] >= Scalar(-1.));
|
|
|
|
|
VERIFY(data2[PacketSize + 1] <= Scalar(1.) && data2[PacketSize + 1] >= Scalar(-1.));
|
|
|
|
|
|
2020-06-23 23:58:34 +08:00
|
|
|
VERIFY_IS_APPROX(data2[0], std::cos(data1[0]));
|
|
|
|
|
VERIFY_IS_APPROX(data2[1], std::cos(data1[1]));
|
|
|
|
|
VERIFY_IS_APPROX(data2[PacketSize + 0], std::sin(data1[0]));
|
|
|
|
|
VERIFY_IS_APPROX(data2[PacketSize + 1], std::sin(data1[1]));
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY_IS_APPROX(numext::abs2(data2[0]) + numext::abs2(data2[PacketSize + 0]), Scalar(1));
|
|
|
|
|
VERIFY_IS_APPROX(numext::abs2(data2[1]) + numext::abs2(data2[PacketSize + 1]), Scalar(1));
|
2019-01-09 22:25:17 +08:00
|
|
|
}
|
2018-12-23 23:13:24 +08:00
|
|
|
}
|
2018-12-24 00:26:21 +08:00
|
|
|
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = NumTraits<Scalar>::infinity();
|
|
|
|
|
data1[1] = -NumTraits<Scalar>::infinity();
|
2018-12-24 00:26:21 +08:00
|
|
|
h.store(data2, internal::psin(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
|
|
|
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
|
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[0] = NumTraits<Scalar>::quiet_NaN();
|
2018-12-24 00:26:21 +08:00
|
|
|
h.store(data2, internal::psin(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
2019-01-09 22:25:17 +08:00
|
|
|
|
|
|
|
|
data1[0] = -Scalar(0.);
|
|
|
|
|
h.store(data2, internal::psin(h.load(data1)));
|
2024-01-31 06:38:43 +08:00
|
|
|
VERIFY(test::biteq(data2[0], data1[0]));
|
2019-01-09 22:25:17 +08:00
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
|
|
|
VERIFY_IS_EQUAL(data2[0], Scalar(1));
|
2018-12-23 23:13:24 +08:00
|
|
|
}
|
2014-10-20 19:13:43 +08:00
|
|
|
}
|
2022-01-27 04:38:05 +08:00
|
|
|
if (PacketTraits::HasReciprocal && PacketSize >= 2) {
|
|
|
|
|
test::packet_helper<PacketTraits::HasReciprocal, Packet> h;
|
|
|
|
|
const Scalar inf = NumTraits<Scalar>::infinity();
|
|
|
|
|
const Scalar zero = Scalar(0);
|
|
|
|
|
data1[0] = zero;
|
|
|
|
|
data1[1] = -zero;
|
|
|
|
|
h.store(data2, internal::preciprocal(h.load(data1)));
|
|
|
|
|
VERIFY_IS_EQUAL(data2[0], inf);
|
|
|
|
|
VERIFY_IS_EQUAL(data2[1], -inf);
|
|
|
|
|
|
|
|
|
|
data1[0] = inf;
|
|
|
|
|
data1[1] = -inf;
|
|
|
|
|
h.store(data2, internal::preciprocal(h.load(data1)));
|
|
|
|
|
VERIFY_IS_EQUAL(data2[0], zero);
|
|
|
|
|
VERIFY_IS_EQUAL(data2[1], -zero);
|
|
|
|
|
}
|
2013-03-21 01:28:40 +08:00
|
|
|
}
|
|
|
|
|
|
2020-08-22 06:52:34 +08:00
|
|
|
#define CAST_CHECK_CWISE1_IF(COND, REFOP, POP, SCALAR, REFTYPE) \
|
|
|
|
|
if (COND) { \
|
|
|
|
|
test::packet_helper<COND, Packet> h; \
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = SCALAR(REFOP(static_cast<REFTYPE>(data1[i]))); \
|
|
|
|
|
h.store(data2, POP(h.load(data1))); \
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
2023-04-18 07:33:59 +08:00
|
|
|
}
|
2020-08-22 06:52:34 +08:00
|
|
|
|
2020-10-08 03:05:18 +08:00
|
|
|
template <typename Scalar>
|
|
|
|
|
Scalar propagate_nan_max(const Scalar& a, const Scalar& b) {
|
2020-10-10 04:05:49 +08:00
|
|
|
if ((numext::isnan)(a)) return a;
|
|
|
|
|
if ((numext::isnan)(b)) return b;
|
|
|
|
|
return (numext::maxi)(a, b);
|
2020-10-08 03:05:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Scalar>
|
|
|
|
|
Scalar propagate_nan_min(const Scalar& a, const Scalar& b) {
|
2020-10-10 04:05:49 +08:00
|
|
|
if ((numext::isnan)(a)) return a;
|
|
|
|
|
if ((numext::isnan)(b)) return b;
|
|
|
|
|
return (numext::mini)(a, b);
|
2020-10-08 03:05:18 +08:00
|
|
|
}
|
|
|
|
|
|
2020-11-25 08:28:07 +08:00
|
|
|
template <typename Scalar>
|
|
|
|
|
Scalar propagate_number_max(const Scalar& a, const Scalar& b) {
|
|
|
|
|
if ((numext::isnan)(a)) return b;
|
|
|
|
|
if ((numext::isnan)(b)) return a;
|
|
|
|
|
return (numext::maxi)(a, b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Scalar>
|
|
|
|
|
Scalar propagate_number_min(const Scalar& a, const Scalar& b) {
|
|
|
|
|
if ((numext::isnan)(a)) return b;
|
|
|
|
|
if ((numext::isnan)(b)) return a;
|
|
|
|
|
return (numext::mini)(a, b);
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-13 06:19:50 +08:00
|
|
|
template <bool Cond, typename Scalar, typename Packet, bool SkipDenorms = false, typename FunctorT>
|
|
|
|
|
std::enable_if_t<!Cond, void> run_ieee_cases(const FunctorT&) {}
|
|
|
|
|
|
|
|
|
|
template <bool Cond, typename Scalar, typename Packet, bool SkipDenorms = false, typename FunctorT>
|
|
|
|
|
std::enable_if_t<Cond, void> run_ieee_cases(const FunctorT& fun) {
|
|
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
|
const Scalar norm_min = (std::numeric_limits<Scalar>::min)();
|
|
|
|
|
const Scalar norm_max = (std::numeric_limits<Scalar>::max)();
|
|
|
|
|
const Scalar inf = (std::numeric_limits<Scalar>::infinity)();
|
|
|
|
|
const Scalar nan = (std::numeric_limits<Scalar>::quiet_NaN)();
|
|
|
|
|
std::vector<Scalar> values{norm_min, Scalar(0), Scalar(1), norm_max, inf, nan};
|
|
|
|
|
|
|
|
|
|
constexpr int size = PacketSize * 2;
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
|
data1[i] = data2[i] = ref[i] = Scalar(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Cond && !SkipDenorms && std::numeric_limits<Scalar>::has_denorm == std::denorm_present) {
|
|
|
|
|
values.push_back(std::numeric_limits<Scalar>::denorm_min());
|
|
|
|
|
values.push_back(norm_min / Scalar(2));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (Scalar abs_value : values) {
|
|
|
|
|
data1[0] = abs_value;
|
|
|
|
|
data1[1] = -data1[0];
|
2026-01-22 10:32:38 +08:00
|
|
|
g_test_stack.push_back("IEEE cases: " + fun.name);
|
2025-11-13 06:19:50 +08:00
|
|
|
CHECK_CWISE1_IF(Cond, fun.expected, fun.actual);
|
2026-01-22 10:32:38 +08:00
|
|
|
g_test_stack.pop_back();
|
2025-11-13 06:19:50 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a tester struct with the actual and the reference function
|
|
|
|
|
// as templated member functions.
|
|
|
|
|
#define CREATE_TESTER(NAME, ACTUAL, EXPECTED) \
|
|
|
|
|
struct NAME { \
|
|
|
|
|
template <typename T> \
|
|
|
|
|
T actual(const T& val) const { \
|
|
|
|
|
return ACTUAL(val); \
|
|
|
|
|
} \
|
|
|
|
|
template <typename T> \
|
|
|
|
|
T expected(const T& val) const { \
|
|
|
|
|
return EXPECTED(val); \
|
|
|
|
|
} \
|
2026-01-22 10:32:38 +08:00
|
|
|
const std::string name = #NAME; \
|
2025-11-13 06:19:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CREATE_TESTER(sqrt_fun, internal::psqrt, numext::sqrt);
|
|
|
|
|
CREATE_TESTER(rsqrt_fun, internal::prsqrt, numext::rsqrt);
|
|
|
|
|
CREATE_TESTER(cbrt_fun, internal::pcbrt, numext::cbrt);
|
|
|
|
|
CREATE_TESTER(exp_fun, internal::pexp, numext::exp);
|
|
|
|
|
CREATE_TESTER(exp2_fun, internal::pexp2, numext::exp2);
|
|
|
|
|
CREATE_TESTER(log_fun, internal::plog, numext::log);
|
|
|
|
|
CREATE_TESTER(log2_fun, internal::plog2, numext::log2);
|
|
|
|
|
CREATE_TESTER(expm1_fun, internal::pexpm1, numext::expm1);
|
|
|
|
|
CREATE_TESTER(log1p_fun, internal::plog1p, numext::log1p);
|
|
|
|
|
CREATE_TESTER(sin_fun, internal::psin, numext::sin);
|
|
|
|
|
CREATE_TESTER(cos_fun, internal::pcos, numext::cos);
|
|
|
|
|
CREATE_TESTER(tan_fun, internal::ptan, numext::tan);
|
|
|
|
|
CREATE_TESTER(asin_fun, internal::pasin, numext::asin);
|
|
|
|
|
CREATE_TESTER(acos_fun, internal::pacos, numext::acos);
|
|
|
|
|
CREATE_TESTER(atan_fun, internal::patan, numext::atan);
|
|
|
|
|
CREATE_TESTER(tanh_fun, internal::ptanh, numext::tanh);
|
|
|
|
|
CREATE_TESTER(atanh_fun, internal::patanh, numext::atanh);
|
|
|
|
|
|
|
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
std::enable_if_t<NumTraits<Scalar>::IsComplex, void> packetmath_ieee_special_values() {}
|
|
|
|
|
|
|
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
std::enable_if_t<!NumTraits<Scalar>::IsComplex, void> packetmath_ieee_special_values() {
|
|
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
|
|
|
|
run_ieee_cases<PacketTraits::HasSqrt, Scalar, Packet>(sqrt_fun());
|
|
|
|
|
// TODO(rmlarsen): See if we can fix rsqrt for denorms without wreaking performance.
|
|
|
|
|
run_ieee_cases<PacketTraits::HasRsqrt, Scalar, Packet, true>(rsqrt_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasCbrt, Scalar, Packet>(cbrt_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasExp, Scalar, Packet>(exp_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasExp, Scalar, Packet>(exp2_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasLog, Scalar, Packet>(log_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasLog, Scalar, Packet>(log2_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasExpm1, Scalar, Packet>(expm1_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasLog1p, Scalar, Packet>(log1p_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasSin, Scalar, Packet>(sin_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasCos, Scalar, Packet>(cos_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasTan, Scalar, Packet>(tan_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasASin, Scalar, Packet>(asin_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasACos, Scalar, Packet>(acos_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasATan, Scalar, Packet>(atan_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasTanh, Scalar, Packet>(tanh_fun());
|
|
|
|
|
run_ieee_cases<PacketTraits::HasATanh, Scalar, Packet>(atanh_fun());
|
|
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void packetmath_notcomplex() {
|
2025-11-13 06:19:50 +08:00
|
|
|
packetmath_ieee_special_values<Scalar, Packet>();
|
|
|
|
|
|
2015-10-13 15:53:46 +08:00
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2013-03-21 01:28:40 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
Array<Scalar, Dynamic, 1>::Map(data1, PacketSize * 4).setRandom();
|
2010-07-05 22:18:09 +08:00
|
|
|
|
2015-10-13 15:53:46 +08:00
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin);
|
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax);
|
|
|
|
|
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin);
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax);
|
2020-11-25 08:28:07 +08:00
|
|
|
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_number_min, internal::pmin<PropagateNumbers>);
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_number_max, internal::pmax<PropagateNumbers>);
|
2020-02-10 22:58:37 +08:00
|
|
|
CHECK_CWISE1(numext::abs, internal::pabs);
|
2024-02-15 07:00:56 +08:00
|
|
|
// Vectorized versions may give a different result in the case of signed int overflow,
|
|
|
|
|
// which is undefined behavior (e.g. NEON).
|
|
|
|
|
// Also note that unsigned integers with size < sizeof(int) may be implicitly converted to a signed
|
|
|
|
|
// int, which can also trigger UB.
|
|
|
|
|
if (Eigen::NumTraits<Scalar>::IsInteger) {
|
|
|
|
|
for (int i = 0; i < 2 * PacketSize; ++i) {
|
|
|
|
|
data1[i] = data1[i] / Scalar(2);
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-03-20 01:45:20 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff);
|
2010-07-05 22:18:09 +08:00
|
|
|
|
|
|
|
|
ref[0] = data1[0];
|
2020-10-14 05:48:31 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmin(ref[0], data1[i]);
|
|
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload<Packet>(data1))) && "internal::predux_min");
|
|
|
|
|
ref[0] = data1[0];
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmax(ref[0], data1[i]);
|
2010-10-25 22:15:22 +08:00
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload<Packet>(data1))) && "internal::predux_max");
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[0] + Scalar(i);
|
2015-08-08 01:27:59 +08:00
|
|
|
internal::pstore(data2, internal::plset<Packet>(data1[0]));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::plset");
|
2019-01-09 22:25:17 +08:00
|
|
|
|
|
|
|
|
{
|
|
|
|
|
unsigned char* data1_bits = reinterpret_cast<unsigned char*>(data1);
|
|
|
|
|
// predux_all - not needed yet
|
|
|
|
|
// for (unsigned int i=0; i<PacketSize*sizeof(Scalar); ++i) data1_bits[i] = 0xff;
|
|
|
|
|
// VERIFY(internal::predux_all(internal::pload<Packet>(data1)) && "internal::predux_all(1111)");
|
|
|
|
|
// for(int k=0; k<PacketSize; ++k)
|
|
|
|
|
// {
|
|
|
|
|
// for (unsigned int i=0; i<sizeof(Scalar); ++i) data1_bits[k*sizeof(Scalar)+i] = 0x0;
|
|
|
|
|
// VERIFY( (!internal::predux_all(internal::pload<Packet>(data1))) && "internal::predux_all(0101)");
|
|
|
|
|
// for (unsigned int i=0; i<sizeof(Scalar); ++i) data1_bits[k*sizeof(Scalar)+i] = 0xff;
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// predux_any
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (unsigned int i = 0; i < PacketSize * sizeof(Scalar); ++i) data1_bits[i] = 0x0;
|
|
|
|
|
VERIFY((!internal::predux_any(internal::pload<Packet>(data1))) && "internal::predux_any(0000)");
|
|
|
|
|
for (int k = 0; k < PacketSize; ++k) {
|
|
|
|
|
for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0xff;
|
|
|
|
|
VERIFY(internal::predux_any(internal::pload<Packet>(data1)) && "internal::predux_any(0101)");
|
|
|
|
|
for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0x00;
|
2019-01-09 22:25:17 +08:00
|
|
|
}
|
|
|
|
|
}
|
2020-10-02 00:54:31 +08:00
|
|
|
|
2020-10-14 05:48:31 +08:00
|
|
|
// Test NaN propagation.
|
|
|
|
|
if (!NumTraits<Scalar>::IsInteger) {
|
|
|
|
|
// Test reductions with no NaNs.
|
|
|
|
|
ref[0] = data1[0];
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmin<PropagateNumbers>(ref[0], data1[i]);
|
|
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_min<PropagateNumbers>(internal::pload<Packet>(data1))) &&
|
|
|
|
|
"internal::predux_min<PropagateNumbers>");
|
|
|
|
|
ref[0] = data1[0];
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmin<PropagateNaN>(ref[0], data1[i]);
|
|
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_min<PropagateNaN>(internal::pload<Packet>(data1))) &&
|
|
|
|
|
"internal::predux_min<PropagateNaN>");
|
|
|
|
|
ref[0] = data1[0];
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmax<PropagateNumbers>(ref[0], data1[i]);
|
|
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_max<PropagateNumbers>(internal::pload<Packet>(data1))) &&
|
|
|
|
|
"internal::predux_max<PropagateNumbers>");
|
|
|
|
|
ref[0] = data1[0];
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmax<PropagateNaN>(ref[0], data1[i]);
|
|
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_max<PropagateNaN>(internal::pload<Packet>(data1))) &&
|
|
|
|
|
"internal::predux_max<PropagateNumbers>");
|
|
|
|
|
// A single NaN.
|
|
|
|
|
const size_t index = std::numeric_limits<size_t>::quiet_NaN() % PacketSize;
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[index] = NumTraits<Scalar>::quiet_NaN();
|
2020-10-14 05:48:31 +08:00
|
|
|
VERIFY(PacketSize == 1 || !(numext::isnan)(internal::predux_min<PropagateNumbers>(internal::pload<Packet>(data1))));
|
|
|
|
|
VERIFY((numext::isnan)(internal::predux_min<PropagateNaN>(internal::pload<Packet>(data1))));
|
|
|
|
|
VERIFY(PacketSize == 1 || !(numext::isnan)(internal::predux_max<PropagateNumbers>(internal::pload<Packet>(data1))));
|
|
|
|
|
VERIFY((numext::isnan)(internal::predux_max<PropagateNaN>(internal::pload<Packet>(data1))));
|
|
|
|
|
// All NaNs.
|
2021-03-17 11:12:46 +08:00
|
|
|
for (int i = 0; i < 4 * PacketSize; ++i) data1[i] = NumTraits<Scalar>::quiet_NaN();
|
2020-10-14 05:48:31 +08:00
|
|
|
VERIFY((numext::isnan)(internal::predux_min<PropagateNumbers>(internal::pload<Packet>(data1))));
|
|
|
|
|
VERIFY((numext::isnan)(internal::predux_min<PropagateNaN>(internal::pload<Packet>(data1))));
|
|
|
|
|
VERIFY((numext::isnan)(internal::predux_max<PropagateNumbers>(internal::pload<Packet>(data1))));
|
|
|
|
|
VERIFY((numext::isnan)(internal::predux_max<PropagateNaN>(internal::pload<Packet>(data1))));
|
|
|
|
|
|
|
|
|
|
// Test NaN propagation for coefficient-wise min and max.
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2021-03-17 11:12:46 +08:00
|
|
|
data1[i] = internal::random<bool>() ? NumTraits<Scalar>::quiet_NaN() : Scalar(0);
|
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? NumTraits<Scalar>::quiet_NaN() : Scalar(0);
|
2020-10-14 05:48:31 +08:00
|
|
|
}
|
|
|
|
|
// Note: NaN propagation is implementation defined for pmin/pmax, so we do not test it here.
|
2020-11-25 08:28:07 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_number_min, (internal::pmin<PropagateNumbers>));
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_number_max, internal::pmax<PropagateNumbers>);
|
2020-10-14 05:48:31 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_nan_min, (internal::pmin<PropagateNaN>));
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_nan_max, internal::pmax<PropagateNaN>);
|
|
|
|
|
}
|
2021-04-24 03:51:43 +08:00
|
|
|
|
2023-04-18 07:33:59 +08:00
|
|
|
packetmath_boolean_mask_ops_notcomplex_test<Scalar, Packet>::run();
|
2009-03-25 20:26:13 +08:00
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet, bool ConjLhs, bool ConjRhs>
|
|
|
|
|
void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) {
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2011-02-24 02:24:26 +08:00
|
|
|
internal::conj_if<ConjLhs> cj0;
|
|
|
|
|
internal::conj_if<ConjRhs> cj1;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::conj_helper<Scalar, Scalar, ConjLhs, ConjRhs> cj;
|
|
|
|
|
internal::conj_helper<Packet, Packet, ConjLhs, ConjRhs> pcj;
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2011-02-24 02:24:26 +08:00
|
|
|
ref[i] = cj0(data1[i]) * cj1(data2[i]);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(internal::isApprox(ref[i], cj.pmul(data1[i], data2[i])) && "conj_helper pmul");
|
2011-02-24 02:24:26 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::pstore(pval, pcj.pmul(internal::pload<Packet>(data1), internal::pload<Packet>(data2)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "conj_helper pmul");
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2011-02-24 02:24:26 +08:00
|
|
|
Scalar tmp = ref[i];
|
|
|
|
|
ref[i] += cj0(data1[i]) * cj1(data2[i]);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(internal::isApprox(ref[i], cj.pmadd(data1[i], data2[i], tmp)) && "conj_helper pmadd");
|
2011-02-24 02:24:26 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::pstore(
|
|
|
|
|
pval, pcj.pmadd(internal::pload<Packet>(data1), internal::pload<Packet>(data2), internal::pload<Packet>(pval)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "conj_helper pmadd");
|
2011-02-24 02:24:26 +08:00
|
|
|
}
|
|
|
|
|
|
2024-02-17 11:08:23 +08:00
|
|
|
template <typename Scalar, typename Packet, bool HasExp = internal::packet_traits<Scalar>::HasExp>
|
|
|
|
|
struct exp_complex_test_impl {
|
|
|
|
|
typedef typename Scalar::value_type RealScalar;
|
2024-03-05 01:44:38 +08:00
|
|
|
|
|
|
|
|
static Scalar pexp1(const Scalar& x) {
|
|
|
|
|
Packet px = internal::pset1<Packet>(x);
|
|
|
|
|
Packet py = internal::pexp(px);
|
|
|
|
|
return internal::pfirst(py);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static Scalar cis(const RealScalar& x) { return Scalar(numext::cos(x), numext::sin(x)); }
|
|
|
|
|
|
|
|
|
|
// Verify equality with signed zero.
|
2024-03-07 08:19:57 +08:00
|
|
|
static bool is_exactly_equal(RealScalar a, RealScalar b) {
|
2024-03-05 01:44:38 +08:00
|
|
|
// NaNs are always unsigned, and always compare not equal directly.
|
|
|
|
|
if ((numext::isnan)(a)) {
|
|
|
|
|
return (numext::isnan)(b);
|
|
|
|
|
}
|
2024-03-07 08:19:57 +08:00
|
|
|
|
2024-03-05 01:44:38 +08:00
|
|
|
RealScalar zero(0);
|
2024-03-07 08:19:57 +08:00
|
|
|
#ifdef EIGEN_ARCH_ARM
|
|
|
|
|
// ARM automatically flushes denormals to zero.
|
|
|
|
|
// Preserve sign by multiplying by +0.
|
|
|
|
|
if (numext::abs(a) < (std::numeric_limits<RealScalar>::min)()) {
|
|
|
|
|
a = a * zero;
|
|
|
|
|
}
|
|
|
|
|
if (numext::abs(b) < (std::numeric_limits<RealScalar>::min)()) {
|
|
|
|
|
b = b * zero;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// Signed zero.
|
2024-03-05 01:44:38 +08:00
|
|
|
if (a == zero) {
|
|
|
|
|
// Signs are either 0 or NaN, so verify that their comparisons to zero are equal.
|
|
|
|
|
return (a == b) && ((numext::signbit(a) == zero) == (numext::signbit(b) == zero));
|
|
|
|
|
}
|
|
|
|
|
// Allow _some_ tolerance.
|
|
|
|
|
return verifyIsApprox(a, b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify equality with signed zero.
|
|
|
|
|
static bool is_exactly_equal(const Scalar& a, const Scalar& b) {
|
|
|
|
|
bool result = is_exactly_equal(numext::real_ref(a), numext::real_ref(b)) &&
|
|
|
|
|
is_exactly_equal(numext::imag_ref(a), numext::imag_ref(b));
|
|
|
|
|
if (!result) {
|
|
|
|
|
std::cout << a << " != " << b << std::endl;
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool is_sign_exp_unspecified(const Scalar& z) {
|
|
|
|
|
const RealScalar inf = std::numeric_limits<RealScalar>::infinity();
|
|
|
|
|
// If z is (-∞,±∞), the result is (±0,±0) (signs are unspecified)
|
|
|
|
|
if (numext::real_ref(z) == -inf && (numext::isinf)(numext::imag_ref(z))) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified)
|
|
|
|
|
if (numext::real_ref(z) == +inf && (numext::isinf)(numext::imag_ref(z))) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// If z is (-∞,NaN), the result is (±0,±0) (signs are unspecified)
|
|
|
|
|
if (numext::real_ref(z) == -inf && (numext::isnan)(numext::imag_ref(z))) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified)
|
|
|
|
|
if (numext::real_ref(z) == +inf && (numext::isnan)(numext::imag_ref(z))) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-17 11:55:32 +08:00
|
|
|
static void run(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
|
2024-02-17 11:08:23 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
|
data1[i] = Scalar(internal::random<RealScalar>(), internal::random<RealScalar>());
|
|
|
|
|
}
|
|
|
|
|
CHECK_CWISE1_N(std::exp, internal::pexp, size);
|
|
|
|
|
|
2024-03-05 01:44:38 +08:00
|
|
|
// Test all corner cases (and more).
|
|
|
|
|
const RealScalar edges[] = {RealScalar(0),
|
|
|
|
|
RealScalar(1),
|
|
|
|
|
RealScalar(2),
|
|
|
|
|
RealScalar(EIGEN_PI / 2),
|
|
|
|
|
RealScalar(EIGEN_PI),
|
|
|
|
|
RealScalar(3 * EIGEN_PI / 2),
|
|
|
|
|
RealScalar(2 * EIGEN_PI),
|
|
|
|
|
numext::log(NumTraits<RealScalar>::highest()) - 1,
|
|
|
|
|
NumTraits<RealScalar>::highest(),
|
|
|
|
|
std::numeric_limits<RealScalar>::infinity(),
|
|
|
|
|
std::numeric_limits<RealScalar>::quiet_NaN(),
|
|
|
|
|
-RealScalar(0),
|
|
|
|
|
-RealScalar(1),
|
|
|
|
|
-RealScalar(2),
|
|
|
|
|
-RealScalar(EIGEN_PI / 2),
|
|
|
|
|
-RealScalar(EIGEN_PI),
|
|
|
|
|
-RealScalar(3 * EIGEN_PI / 2),
|
|
|
|
|
-RealScalar(2 * EIGEN_PI),
|
|
|
|
|
-numext::log(NumTraits<RealScalar>::highest()) + 1,
|
|
|
|
|
-NumTraits<RealScalar>::highest(),
|
|
|
|
|
-std::numeric_limits<RealScalar>::infinity(),
|
|
|
|
|
-std::numeric_limits<RealScalar>::quiet_NaN()};
|
|
|
|
|
|
|
|
|
|
for (RealScalar x : edges) {
|
|
|
|
|
for (RealScalar y : edges) {
|
|
|
|
|
Scalar z = Scalar(x, y);
|
|
|
|
|
Scalar w = pexp1(z);
|
|
|
|
|
if (is_sign_exp_unspecified(z)) {
|
|
|
|
|
Scalar abs_w = Scalar(numext::abs(numext::real_ref(w)), numext::abs(numext::imag_ref(w)));
|
|
|
|
|
Scalar expected = numext::exp(z);
|
|
|
|
|
Scalar abs_expected =
|
|
|
|
|
Scalar(numext::abs(numext::real_ref(expected)), numext::abs(numext::imag_ref(expected)));
|
|
|
|
|
VERIFY(is_exactly_equal(abs_w, abs_expected));
|
|
|
|
|
} else {
|
|
|
|
|
VERIFY(is_exactly_equal(w, numext::exp(z)));
|
|
|
|
|
}
|
2024-02-17 11:08:23 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
struct exp_complex_test_impl<Scalar, Packet, false> {
|
|
|
|
|
typedef typename Scalar::value_type RealScalar;
|
2024-11-26 14:27:26 +08:00
|
|
|
static void run(Scalar*, Scalar*, Scalar*, int) {};
|
2024-02-17 11:08:23 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename Scalar, typename Packet>
|
2024-02-17 11:55:32 +08:00
|
|
|
void exp_complex_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
|
2024-02-17 11:08:23 +08:00
|
|
|
exp_complex_test_impl<Scalar, Packet>::run(data1, data2, ref, size);
|
|
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void packetmath_complex() {
|
2020-12-09 10:13:35 +08:00
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
|
|
|
|
typedef typename Scalar::value_type RealScalar;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2010-07-07 02:54:14 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
const int size = PacketSize * 4;
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar pval[PacketSize * 4];
|
2023-12-07 20:41:09 +08:00
|
|
|
EIGEN_ALIGN_MAX RealScalar realdata[PacketSize * 4];
|
|
|
|
|
EIGEN_ALIGN_MAX RealScalar realref[PacketSize * 4];
|
2010-07-07 02:54:14 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2010-10-25 22:15:22 +08:00
|
|
|
data1[i] = internal::random<Scalar>() * Scalar(1e2);
|
|
|
|
|
data2[i] = internal::random<Scalar>() * Scalar(1e2);
|
2010-07-07 02:54:14 +08:00
|
|
|
}
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test_conj_helper<Scalar, Packet, false, false>(data1, data2, ref, pval);
|
|
|
|
|
test_conj_helper<Scalar, Packet, false, true>(data1, data2, ref, pval);
|
|
|
|
|
test_conj_helper<Scalar, Packet, true, false>(data1, data2, ref, pval);
|
|
|
|
|
test_conj_helper<Scalar, Packet, true, true>(data1, data2, ref, pval);
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2020-12-09 10:13:35 +08:00
|
|
|
// Test pcplxflip.
|
2011-02-23 21:20:33 +08:00
|
|
|
{
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = Scalar(std::imag(data1[i]), std::real(data1[i]));
|
|
|
|
|
internal::pstore(pval, internal::pcplxflip(internal::pload<Packet>(data1)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "pcplxflip");
|
2011-02-23 21:20:33 +08:00
|
|
|
}
|
2020-12-09 10:13:35 +08:00
|
|
|
|
|
|
|
|
if (PacketTraits::HasSqrt) {
|
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
|
data1[i] = Scalar(internal::random<RealScalar>(), internal::random<RealScalar>());
|
|
|
|
|
}
|
2021-01-08 01:39:05 +08:00
|
|
|
CHECK_CWISE1_N(numext::sqrt, internal::psqrt, size);
|
2022-08-10 03:54:57 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign);
|
2020-12-09 10:13:35 +08:00
|
|
|
|
|
|
|
|
// Test misc. corner cases.
|
|
|
|
|
const RealScalar zero = RealScalar(0);
|
|
|
|
|
const RealScalar one = RealScalar(1);
|
|
|
|
|
const RealScalar inf = std::numeric_limits<RealScalar>::infinity();
|
|
|
|
|
const RealScalar nan = std::numeric_limits<RealScalar>::quiet_NaN();
|
|
|
|
|
data1[0] = Scalar(zero, zero);
|
|
|
|
|
data1[1] = Scalar(-zero, zero);
|
|
|
|
|
data1[2] = Scalar(one, zero);
|
|
|
|
|
data1[3] = Scalar(zero, one);
|
2021-01-08 01:39:05 +08:00
|
|
|
CHECK_CWISE1_N(numext::sqrt, internal::psqrt, 4);
|
2020-12-09 10:13:35 +08:00
|
|
|
data1[0] = Scalar(-one, zero);
|
|
|
|
|
data1[1] = Scalar(zero, -one);
|
|
|
|
|
data1[2] = Scalar(one, one);
|
|
|
|
|
data1[3] = Scalar(-one, -one);
|
2021-01-08 01:39:05 +08:00
|
|
|
CHECK_CWISE1_N(numext::sqrt, internal::psqrt, 4);
|
2020-12-09 10:13:35 +08:00
|
|
|
data1[0] = Scalar(inf, zero);
|
|
|
|
|
data1[1] = Scalar(zero, inf);
|
|
|
|
|
data1[2] = Scalar(-inf, zero);
|
|
|
|
|
data1[3] = Scalar(zero, -inf);
|
2021-01-08 01:39:05 +08:00
|
|
|
CHECK_CWISE1_N(numext::sqrt, internal::psqrt, 4);
|
2020-12-09 10:13:35 +08:00
|
|
|
data1[0] = Scalar(inf, inf);
|
|
|
|
|
data1[1] = Scalar(-inf, inf);
|
|
|
|
|
data1[2] = Scalar(inf, -inf);
|
|
|
|
|
data1[3] = Scalar(-inf, -inf);
|
2021-01-08 01:39:05 +08:00
|
|
|
CHECK_CWISE1_N(numext::sqrt, internal::psqrt, 4);
|
2020-12-09 10:13:35 +08:00
|
|
|
data1[0] = Scalar(nan, zero);
|
|
|
|
|
data1[1] = Scalar(zero, nan);
|
|
|
|
|
data1[2] = Scalar(nan, one);
|
|
|
|
|
data1[3] = Scalar(one, nan);
|
2021-01-08 01:39:05 +08:00
|
|
|
CHECK_CWISE1_N(numext::sqrt, internal::psqrt, 4);
|
2020-12-09 10:13:35 +08:00
|
|
|
data1[0] = Scalar(nan, nan);
|
|
|
|
|
data1[1] = Scalar(inf, nan);
|
|
|
|
|
data1[2] = Scalar(nan, inf);
|
|
|
|
|
data1[3] = Scalar(-inf, nan);
|
2021-01-08 01:39:05 +08:00
|
|
|
CHECK_CWISE1_N(numext::sqrt, internal::psqrt, 4);
|
2020-12-09 10:13:35 +08:00
|
|
|
}
|
2023-12-07 20:41:09 +08:00
|
|
|
if (PacketTraits::HasLog) {
|
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
|
data1[i] = Scalar(internal::random<RealScalar>(), internal::random<RealScalar>());
|
|
|
|
|
}
|
|
|
|
|
CHECK_CWISE1_N(std::log, internal::plog, size);
|
|
|
|
|
|
|
|
|
|
// Test misc. corner cases.
|
|
|
|
|
const RealScalar zero = RealScalar(0);
|
|
|
|
|
const RealScalar one = RealScalar(1);
|
|
|
|
|
const RealScalar inf = std::numeric_limits<RealScalar>::infinity();
|
|
|
|
|
const RealScalar nan = std::numeric_limits<RealScalar>::quiet_NaN();
|
|
|
|
|
for (RealScalar x : {zero, one, inf}) {
|
|
|
|
|
for (RealScalar y : {zero, one, inf}) {
|
|
|
|
|
data1[0] = Scalar(x, y);
|
|
|
|
|
data1[1] = Scalar(-x, y);
|
|
|
|
|
data1[2] = Scalar(x, -y);
|
|
|
|
|
data1[3] = Scalar(-x, -y);
|
|
|
|
|
CHECK_CWISE1_IM1ULP_N(std::log, internal::plog, 4);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Set reference results to nan.
|
|
|
|
|
// Some architectures don't handle IEEE edge cases correctly
|
|
|
|
|
ref[0] = Scalar(nan, nan);
|
|
|
|
|
ref[1] = Scalar(nan, nan);
|
|
|
|
|
ref[2] = Scalar(nan, nan);
|
|
|
|
|
ref[3] = Scalar(nan, nan);
|
|
|
|
|
for (RealScalar x : {zero, one}) {
|
|
|
|
|
data1[0] = Scalar(x, nan);
|
|
|
|
|
data1[1] = Scalar(-x, nan);
|
|
|
|
|
data1[2] = Scalar(nan, x);
|
|
|
|
|
data1[3] = Scalar(nan, -x);
|
|
|
|
|
for (int j = 0; j < size; j += PacketSize)
|
|
|
|
|
internal::pstore(data2 + j, internal::plog(internal::pload<Packet>(data1 + j)));
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, 4));
|
|
|
|
|
}
|
|
|
|
|
data1[0] = Scalar(inf, nan);
|
|
|
|
|
data1[1] = Scalar(-inf, nan);
|
|
|
|
|
data1[2] = Scalar(nan, inf);
|
|
|
|
|
data1[3] = Scalar(nan, -inf);
|
2024-03-07 07:51:47 +08:00
|
|
|
CHECK_CWISE1_IM1ULP_N(numext::log, internal::plog, 4);
|
2023-12-07 20:41:09 +08:00
|
|
|
}
|
2024-02-17 11:08:23 +08:00
|
|
|
exp_complex_test<Scalar, Packet>(data1, data2, ref, size);
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
|
void packetmath_scatter_gather() {
|
2014-03-28 07:03:03 +08:00
|
|
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2015-07-29 17:11:23 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize];
|
2020-08-22 06:52:34 +08:00
|
|
|
RealScalar refvalue = RealScalar(0);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2025-02-26 05:37:09 +08:00
|
|
|
data1[i] = internal::random<Scalar>();
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
int stride = internal::random<int>(1, 20);
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2020-11-20 07:44:19 +08:00
|
|
|
// Buffer of zeros.
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar buffer[PacketSize * 20] = {};
|
|
|
|
|
|
2014-03-28 07:03:03 +08:00
|
|
|
Packet packet = internal::pload<Packet>(data1);
|
2014-07-09 22:01:24 +08:00
|
|
|
internal::pscatter<Scalar, Packet>(buffer, packet, stride);
|
2014-03-28 07:03:03 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize * 20; ++i) {
|
|
|
|
|
if ((i % stride) == 0 && i < stride * PacketSize) {
|
|
|
|
|
VERIFY(test::isApproxAbs(buffer[i], data1[i / stride], refvalue) && "pscatter");
|
2014-03-28 07:03:03 +08:00
|
|
|
} else {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(test::isApproxAbs(buffer[i], Scalar(0), refvalue) && "pscatter");
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize * 7; ++i) {
|
2025-02-26 05:37:09 +08:00
|
|
|
buffer[i] = internal::random<Scalar>();
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
|
|
|
|
packet = internal::pgather<Scalar, Packet>(buffer, 7);
|
|
|
|
|
internal::pstore(data1, packet);
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(test::isApproxAbs(data1[i], buffer[i * 7], refvalue) && "pgather");
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
2022-06-28 03:18:00 +08:00
|
|
|
|
|
|
|
|
for (Index N = 0; N <= PacketSize; ++N) {
|
|
|
|
|
for (Index i = 0; i < N; ++i) {
|
2025-03-23 09:05:53 +08:00
|
|
|
data1[i] = internal::random<Scalar>();
|
2022-06-28 03:18:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (Index i = 0; i < N * 20; ++i) {
|
|
|
|
|
buffer[i] = Scalar(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
packet = internal::pload_partial<Packet>(data1, N);
|
|
|
|
|
internal::pscatter_partial<Scalar, Packet>(buffer, packet, stride, N);
|
|
|
|
|
|
|
|
|
|
for (Index i = 0; i < N * 20; ++i) {
|
|
|
|
|
if ((i % stride) == 0 && i < stride * N) {
|
|
|
|
|
VERIFY(test::isApproxAbs(buffer[i], data1[i / stride], refvalue) && "pscatter_partial");
|
|
|
|
|
} else {
|
|
|
|
|
VERIFY(test::isApproxAbs(buffer[i], Scalar(0), refvalue) && "pscatter_partial");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (Index i = 0; i < N * 7; ++i) {
|
2025-03-23 09:05:53 +08:00
|
|
|
buffer[i] = internal::random<Scalar>();
|
2022-06-28 03:18:00 +08:00
|
|
|
}
|
|
|
|
|
packet = internal::pgather_partial<Scalar, Packet>(buffer, 7, N);
|
|
|
|
|
internal::pstore_partial(data1, packet, N);
|
|
|
|
|
for (Index i = 0; i < N; ++i) {
|
|
|
|
|
VERIFY(test::isApproxAbs(data1[i], buffer[i * 7], refvalue) && "pgather_partial");
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-07-07 02:54:14 +08:00
|
|
|
}
|
|
|
|
|
|
2020-01-11 18:31:21 +08:00
|
|
|
namespace Eigen {
|
|
|
|
|
namespace test {
|
2018-11-26 21:10:07 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename PacketType>
|
|
|
|
|
struct runall<Scalar, PacketType, false, false> { // i.e. float or double
|
2018-11-26 21:10:07 +08:00
|
|
|
static void run() {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
packetmath<Scalar, PacketType>();
|
|
|
|
|
packetmath_scatter_gather<Scalar, PacketType>();
|
|
|
|
|
packetmath_notcomplex<Scalar, PacketType>();
|
|
|
|
|
packetmath_real<Scalar, PacketType>();
|
2018-11-26 21:10:07 +08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename PacketType>
|
|
|
|
|
struct runall<Scalar, PacketType, false, true> { // i.e. int
|
2018-11-26 21:10:07 +08:00
|
|
|
static void run() {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
packetmath<Scalar, PacketType>();
|
|
|
|
|
packetmath_scatter_gather<Scalar, PacketType>();
|
|
|
|
|
packetmath_notcomplex<Scalar, PacketType>();
|
2018-11-26 21:10:07 +08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename PacketType>
|
|
|
|
|
struct runall<Scalar, PacketType, true, false> { // i.e. complex
|
2018-11-26 21:10:07 +08:00
|
|
|
static void run() {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
packetmath<Scalar, PacketType>();
|
|
|
|
|
packetmath_scatter_gather<Scalar, PacketType>();
|
|
|
|
|
packetmath_complex<Scalar, PacketType>();
|
2018-11-26 21:10:07 +08:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
} // namespace test
|
|
|
|
|
} // namespace Eigen
|
2018-11-26 21:10:07 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
EIGEN_DECLARE_TEST(packetmath) {
|
2018-11-26 21:10:07 +08:00
|
|
|
g_first_pass = true;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < g_repeat; i++) {
|
|
|
|
|
CALL_SUBTEST_1(test::runner<float>::run());
|
|
|
|
|
CALL_SUBTEST_2(test::runner<double>::run());
|
|
|
|
|
CALL_SUBTEST_3(test::runner<int8_t>::run());
|
|
|
|
|
CALL_SUBTEST_4(test::runner<uint8_t>::run());
|
|
|
|
|
CALL_SUBTEST_5(test::runner<int16_t>::run());
|
|
|
|
|
CALL_SUBTEST_6(test::runner<uint16_t>::run());
|
|
|
|
|
CALL_SUBTEST_7(test::runner<int32_t>::run());
|
|
|
|
|
CALL_SUBTEST_8(test::runner<uint32_t>::run());
|
|
|
|
|
CALL_SUBTEST_9(test::runner<int64_t>::run());
|
|
|
|
|
CALL_SUBTEST_10(test::runner<uint64_t>::run());
|
|
|
|
|
CALL_SUBTEST_11(test::runner<std::complex<float>>::run());
|
|
|
|
|
CALL_SUBTEST_12(test::runner<std::complex<double>>::run());
|
2020-11-19 07:02:21 +08:00
|
|
|
CALL_SUBTEST_13(test::runner<half>::run());
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
CALL_SUBTEST_14((packetmath<bool, internal::packet_traits<bool>::type>()));
|
2025-02-26 05:37:09 +08:00
|
|
|
CALL_SUBTEST_14((packetmath_scatter_gather<bool, internal::packet_traits<bool>::type>()));
|
2020-08-22 06:52:34 +08:00
|
|
|
CALL_SUBTEST_15(test::runner<bfloat16>::run());
|
2018-11-26 21:10:07 +08:00
|
|
|
g_first_pass = false;
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
}
|