Sfoglia il codice sorgente

package/highway: bump to version 1.0.3

- Dropped patch upstreamed in:
  https://github.com/google/highway/commit/1cab22047a6ef750ef2bc38ad47d6e765d6d376a

- Add an upstream patch, not in 1.0.3 release:
  https://github.com/google/highway/commit/411300d0eec10d5635cbdd064299630c507348e1

- Add a new patch, to fix armv7 builds with vfp < v4.
  Proposed upstream in:
  https://github.com/google/highway/pull/1143

- Add a comment about -DHWY_CMAKE_ARM7=OFF since the name is a bit
  misleading. It should better be ARMV7 or ARMV7_VFPV4.

For change log since 1.0.2, see:
- https://github.com/google/highway/releases/tag/1.0.3

Signed-off-by: Julien Olivain <ju.o@free.fr>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Julien Olivain 2 anni fa
parent
commit
6a789cd0d1

+ 0 - 62
package/highway/0001-Check-for-the-presence-of-sys-auxv.h.patch

@@ -1,62 +0,0 @@
-From 491e3b1c2b8c44a2cfd35db117b02ef0fdf6a8e5 Mon Sep 17 00:00:00 2001
-From: Julien Olivain <ju.o@free.fr>
-Date: Wed, 23 Nov 2022 23:27:11 +0100
-Subject: [PATCH] Check for the presence of <sys/auxv.h>
-
-Not all gcc versions are providing <sys/auxv.h>. Checking for
-HWY_ARCH_ARM && HWY_COMPILER_GCC_ACTUAL && HWY_OS_LINUX is not
-sufficient and fail to build in some situations (it was observed for
-some gcc armv7m toolchains).
-
-This patch adds a check for <sys/auxv.h> and include it only if present.
-
-Signed-off-by: Julien Olivain <ju.o@free.fr>
----
- CMakeLists.txt       | 3 +++
- hwy/detect_targets.h | 2 +-
- hwy/targets.cc       | 2 +-
- 3 files changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index b6b14ab..df6b5ab 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -84,6 +84,9 @@ check_cxx_source_compiles(
-   HWY_RISCV
- )
- 
-+include(CheckIncludeFile)
-+check_include_file(sys/auxv.h  HAVE_SYS_AUXV_H)
-+
- if (HWY_ENABLE_CONTRIB)
- # Glob all the traits so we don't need to modify this file when adding
- # additional special cases.
-diff --git a/hwy/detect_targets.h b/hwy/detect_targets.h
-index 7f7e179..f0c6f94 100644
---- a/hwy/detect_targets.h
-+++ b/hwy/detect_targets.h
-@@ -392,7 +392,7 @@
- #define HWY_HAVE_RUNTIME_DISPATCH 1
- // On Arm, currently only GCC does, and we require Linux to detect CPU
- // capabilities.
--#elif HWY_ARCH_ARM && HWY_COMPILER_GCC_ACTUAL && HWY_OS_LINUX
-+#elif HWY_ARCH_ARM && HWY_COMPILER_GCC_ACTUAL && HWY_OS_LINUX && HAVE_SYS_AUXV_H
- #define HWY_HAVE_RUNTIME_DISPATCH 1
- #else
- #define HWY_HAVE_RUNTIME_DISPATCH 0
-diff --git a/hwy/targets.cc b/hwy/targets.cc
-index 2fde4db..abd6a94 100644
---- a/hwy/targets.cc
-+++ b/hwy/targets.cc
-@@ -42,7 +42,7 @@
- #include <cpuid.h>
- #endif  // HWY_COMPILER_MSVC
- 
--#elif HWY_ARCH_ARM && HWY_OS_LINUX
-+#elif HWY_ARCH_ARM && HWY_OS_LINUX && HAVE_SYS_AUXV_H
- #include <asm/hwcap.h>
- #include <sys/auxv.h>
- #endif  // HWY_ARCH_*
--- 
-2.38.1
-

+ 46 - 0
package/highway/0001-Fix-compilation-for-armv7-with-gcc-8.patch

@@ -0,0 +1,46 @@
+From 94cda9cc8cd12345a6dbe70e40f3119d5bf7ee78 Mon Sep 17 00:00:00 2001
+From: Julien Olivain <ju.o@free.fr>
+Date: Fri, 10 Feb 2023 21:25:36 +0100
+Subject: [PATCH] Fix compilation for armv7 with gcc < 8
+
+Highway uses the construct __attribute__((target(+neon-vfpv4)) for
+Armv7. The target "+neon-vfpv4" was introduced in gcc 8, in commit [1].
+When using a gcc < 8 (for example, like [2]), compilation fails with
+message:
+
+    In file included from /build/highway-1.0.3/hwy/foreach_target.h:81:0,
+                     from /build/highway-1.0.3/hwy/per_target.cc:20:
+    /build/highway-1.0.3/hwy/per_target.cc: At global scope:
+    /build/highway-1.0.3/hwy/per_target.cc:23:22: error: attribute(target("+neon-vfpv4")) is unknown
+
+This commit protects the definition of HWY_TARGET_STR only when gcc
+version 8 or greater is used for armv7.
+
+[1] https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e87afe54b86c478ae63569e51e7abb67d3fe3fce
+[2] https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/arm-linux-gnueabihf/gcc-linaro-7.3.1-2018.05-x86_64_arm-linux-gnueabihf.tar.xz
+
+Signed-off-by: Julien Olivain <ju.o@free.fr>
+---
+ hwy/ops/set_macros-inl.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/hwy/ops/set_macros-inl.h b/hwy/ops/set_macros-inl.h
+index 051dbb3..00b4b1b 100644
+--- a/hwy/ops/set_macros-inl.h
++++ b/hwy/ops/set_macros-inl.h
+@@ -230,7 +230,12 @@
+ // Can use pragmas instead of -march compiler flag
+ #if HWY_HAVE_RUNTIME_DISPATCH
+ #if HWY_ARCH_ARM_V7
++#if HWY_COMPILER_GCC_ACTUAL >= 800
++// The __attribute__((target(+neon-vfpv4)) was introduced in gcc >= 8.
++// In case we have a gcc < 8, we can still compile by keeping
++// HWY_TARGET_STR undefined.
+ #define HWY_TARGET_STR "+neon-vfpv4"
++#endif
+ #else
+ #define HWY_TARGET_STR "+crypto"
+ #endif  // HWY_ARCH_ARM_V7
+-- 
+2.39.2
+

+ 118 - 0
package/highway/0002-Fix-compilation-for-armv7-targets-with-vfp-v4-and-gc.patch

@@ -0,0 +1,118 @@
+From 93d4579f90dd6ad26fd0dcda6420b3bb2fdcbc02 Mon Sep 17 00:00:00 2001
+From: Julien Olivain <ju.o@free.fr>
+Date: Mon, 20 Feb 2023 23:22:28 +0100
+Subject: [PATCH] Fix compilation for armv7 targets with vfp < v4 and gcc >= 8
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When using a armv7 gcc >= 8 toolchain (like [1]) with Highway
+configured with -DHWY_CMAKE_ARM7=OFF and HWY_ENABLE_CONTRIB=ON,
+compilation fails with error:
+
+    In file included from /build/highway-1.0.3/hwy/ops/arm_neon-inl.h:33,
+                     from /build/highway-1.0.3/hwy/highway.h:358,
+                     from /build/highway-1.0.3/hwy/contrib/sort/shared-inl.h:104,
+                     from /build/highway-1.0.3/hwy/contrib/sort/traits128-inl.h:27,
+                     from /build/highway-1.0.3/hwy/contrib/sort/vqsort_128d.cc:23,
+                     from /build/highway-1.0.3/hwy/foreach_target.h:81,
+                     from /build/highway-1.0.3/hwy/contrib/sort/vqsort_128d.cc:20:
+    /toolchain/lib/gcc/arm-buildroot-linux-gnueabihf/12.2.0/include/arm_neon.h: In function 'void hwy::N_NEON::StoreU(Vec128<long long unsigned int, 2>, Full128<long long unsigned int>, uint64_t*)':
+    /toolchain/lib/gcc/arm-buildroot-linux-gnueabihf/12.2.0/include/arm_neon.h:11052:1: error: inlining failed in call to 'always_inline' 'void vst1q_u64(uint64_t*, uint64x2_t)': target specific option mismatch
+    11052 | vst1q_u64 (uint64_t * __a, uint64x2_t __b)
+          | ^~~~~~~~~
+    /build/highway-1.0.3/hwy/ops/arm_neon-inl.h:2786:12: note: called from here
+     2786 |   vst1q_u64(unaligned, v.raw);
+          |   ~~~~~~~~~^~~~~~~~~~~~~~~~~~
+
+The same errors happen when configured with HWY_ENABLE_EXAMPLES=ON,
+or from client libraries like libjxl (at other places).
+
+The issue is that Highway Arm NEON ops have a dependency on the
+Advanced SIMD (Neon) v2 and the VFPv4 floating-point instructions.
+The SIMD (Neon) v1 and VFPv3 instructions are not supported.
+
+There was several attempts to fix variants of this issues.
+See #834 and #1032.
+
+HWY_NEON target is selected only if __ARM_NEON is defined. See:
+https://github.com/google/highway/blob/1.0.3/hwy/detect_targets.h#L251
+
+This test is not sufficient since __ARM_NEON will be predefined in
+any cases when Neon is enabled (neon-vfpv3, neon-vfpv4).
+
+The issue is that HWY_CMAKE_ARM7=ON implies VFPv4 / NEON SIMD v2.
+When setting HWY_CMAKE_ARM7=OFF, "neon-vfpv4" will not be forced,
+but the code is still using intrinsics assuming VFPv4. Gcc will fail
+with error because code cannot be generated for the selected
+architecture.
+
+This issue can be avoided by adding "-DHWY_DISABLED_TARGETS=HWY_NEON" in
+CXXFLAGS. The problem with this solution is that every client program will
+also need to do the same. This goes against the very purpose of
+"hwy/detect_targets.h".
+
+Technically, Armv7-a processors with VFPv4 can be detected using some
+ACLE (Arm C Language Extensions [2]) predefined macros:
+
+Basically, we want Highway to define HWY_NEON only when the target
+supports SIMDv2/VFPv4 or higher. An older target with vfpv3 only
+(e.g. Cortex-A8, A9, ...) would NOT define HWY_NEON, and therefore
+would fallback on HWY_SCALAR implementation.
+
+However, not all compiler completely support ACLE. There is also
+several versions too. So we cannot easily rely on macros like
+"__ARM_VFPV4__" (which clang predefine, but not gcc).
+
+The alternative solution proposed in this patch, is to declare the
+HWY_NEON target architecture as broken, when we detect the target is
+Armv7-A, but mandatory features for vfpv4 (namely half-float, FMA)
+are missing. Half-floats are tested using the macro __ARM_NEON_FP,
+and the FMA with the macro __ARM_FEATURE_FMA. See ACLE [2]. The
+intent of declaring the target as broken, rather than selecting
+HWY_NEON only if vfpv4 features are detected is to remain a bit
+conservative, since the detection is slithly inaccurate.
+
+For a given compiler/cflags, predefined macros for Arm/ACLE can be
+reviewed with commands like:
+
+    arm-linux-gnueabihf-gcc -mcpu=cortex-a9 -mfpu=neon-vfpv3 -Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort
+    arm-linux-gnueabihf-gcc -mcpu=cortex-a7 -mfpu=neon-vfpv4 -Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort
+    clang -target armv7a -mcpu=cortex-a9 -mfpu=neon-vfpv3 -mfloat-abi=hard -Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort
+    clang -target armv7a -mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard -Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort
+
+The different values of __ARM_NEON_FP can be seen, depending which
+"-mfpu" is passed. Same for __ARM_FEATURE_FMA.
+
+[1] https://toolchains.bootlin.com/downloads/releases/toolchains/armv7-eabihf/tarballs/armv7-eabihf--glibc--bleeding-edge-2022.08-1.tar.bz2
+[2] https://github.com/ARM-software/acle/
+
+Signed-off-by: Julien Olivain <ju.o@free.fr>
+---
+ hwy/detect_targets.h | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/hwy/detect_targets.h b/hwy/detect_targets.h
+index 2beca95..40ae7fe 100644
+--- a/hwy/detect_targets.h
++++ b/hwy/detect_targets.h
+@@ -154,6 +154,16 @@
+      (defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN))
+ #define HWY_BROKEN_TARGETS (HWY_NEON)
+ 
++// armv7-a without a detected vfpv4 is not supported
++// (for example Cortex-A8, Cortex-A9)
++// vfpv4 always have neon half-float _and_ FMA.
++#elif HWY_ARCH_ARM_V7 && \
++    (__ARM_ARCH_PROFILE == 'A') && \
++    !defined(__ARM_VFPV4__) && \
++        !((__ARM_NEON_FP & 0x2 /* half-float */) && \
++          (__ARM_FEATURE_FMA == 1))
++#define HWY_BROKEN_TARGETS (HWY_NEON)
++
+ // SVE[2] require recent clang or gcc versions.
+ #elif (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
+     (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
+-- 
+2.39.2
+

+ 1 - 1
package/highway/highway.hash

@@ -1,3 +1,3 @@
 # Locally computed:
-sha256  e8ef71236ac0d97f12d553ec1ffc5b6375d57b5f0b860c7447dd69b6ed1072db  highway-1.0.2.tar.gz
+sha256  566fc77315878473d9a6bd815f7de78c73734acdcb745c3dde8579560ac5440e  highway-1.0.3.tar.gz
 sha256  43070e2d4e532684de521b885f385d0841030efa2b1a20bafb76133a5e1379c1  LICENSE

+ 4 - 1
package/highway/highway.mk

@@ -4,7 +4,7 @@
 #
 ################################################################################
 
-HIGHWAY_VERSION = 1.0.2
+HIGHWAY_VERSION = 1.0.3
 HIGHWAY_SITE = $(call github,google,highway,$(HIGHWAY_VERSION))
 HIGHWAY_LICENSE = Apache-2.0
 HIGHWAY_LICENSE_FILES = LICENSE
@@ -35,6 +35,9 @@ endif
 ifeq ($(BR2_ARM_FPU_VFPV4),y)
 HIGHWAY_CONF_OPTS += -DHWY_CMAKE_ARM7=ON
 else
+# Highway Armv7 Neon support requires in fact vfpv4 / neon v2. When we
+# are in a vfpv3 case (e.g. Cortex-A8, Cortex-A9) this flag need to be
+# set to off.
 HIGHWAY_CONF_OPTS += -DHWY_CMAKE_ARM7=OFF
 endif