Commit eb5ce0ca4206ed4f74009c1b9a3a72407693448b

Authored by Janne Grunau
1 parent 568df90e
Exists in master and in 3 other branches v2, v3, wip-18092

configure: add ARM/AArch64 NEON support

Checks for arm_neon.h header.
configure.ac
... ... @@ -24,6 +24,27 @@ AC_PROG_CC
24 24  
25 25 AX_EXT()
26 26  
  27 +AC_ARG_ENABLE([neon],
  28 + AS_HELP_STRING([--disable-neon], [Build without NEON optimizations]))
  29 +
  30 +AS_IF([test "x$enable_neon" != "xno"],
  31 + [noneon_CPPFLAGS=$CPPFLAGS
  32 + CPPFLAGS="$CPPFLAGS $SIMD_FLAGS"
  33 + AC_CHECK_HEADER([arm_neon.h],
  34 + [have_neon=yes],
  35 + [have_neon=no
  36 + CPPFLAGS=$noneon_CPPFLAGS])],
  37 + [have_neon=no
  38 + AS_IF([test "x$ax_cv_have_neon_ext" = "xyes"],
  39 + [SIMD_FLAGS=""])
  40 + ])
  41 +
  42 +AS_IF([test "x$have_neon" = "xno"],
  43 + [AS_IF([test "x$enable_neon" = "xyes"],
  44 + [AC_MSG_ERROR([neon requested but arm_neon.h not found])])
  45 + ])
  46 +AM_CONDITIONAL([HAVE_NEON], [test "x$have_neon" = "xyes"])
  47 +
27 48 AC_ARG_ENABLE([sse],
28 49 AS_HELP_STRING([--disable-sse], [Build without SSE optimizations]),
29 50 [if test "x$enableval" = "xno" ; then
... ...
include/gf_complete.h
... ... @@ -33,6 +33,10 @@
33 33 #include <wmmintrin.h>
34 34 #endif
35 35  
  36 +#if defined(ARM_NEON)
  37 + #include <arm_neon.h>
  38 +#endif
  39 +
36 40  
37 41 /* These are the different ways to perform multiplication.
38 42 Not all are implemented for all values of w.
... ...
m4/ax_ext.m4
... ... @@ -41,6 +41,55 @@ AC_DEFUN([AX_EXT],
41 41 AC_REQUIRE([AC_CANONICAL_HOST])
42 42  
43 43 case $host_cpu in
  44 + aarch64*)
  45 + AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
  46 + SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
  47 +
  48 + AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
  49 + [
  50 + # TODO: detect / cross-compile
  51 + ax_cv_have_neon_ext=yes
  52 + ])
  53 + AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],
  54 + [
  55 + # TODO: detect / cross-compile
  56 + ax_cv_have_arm_crypt_ext=yes
  57 + ])
  58 +
  59 + if test "$ax_cv_have_arm_crypt_ext" = yes; then
  60 + AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])
  61 + fi
  62 +
  63 + if test "$ax_cv_have_neon_ext" = yes; then
  64 + AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
  65 + fi
  66 +
  67 + if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then
  68 + AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,
  69 + SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])
  70 + elif test "$ax_cv_have_arm_crypt_ext" = yes; then
  71 + AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,
  72 + SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])
  73 + elif test "$ax_cv_have_neon_ext" = yes; then
  74 + AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,
  75 + SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])
  76 + fi
  77 + ;;
  78 +
  79 + arm*)
  80 + AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
  81 + [
  82 + # TODO: detect / cross-compile
  83 + ax_cv_have_neon_ext=yes
  84 + ])
  85 +
  86 + if test "$ax_cv_have_neon_ext" = yes; then
  87 + AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
  88 + AX_CHECK_COMPILE_FLAG(-mfpu=neon,
  89 + SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", [])
  90 + fi
  91 + ;;
  92 +
44 93 powerpc*)
45 94 AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
46 95 [
... ...