Commit ad11042132c7db78e8ae57a364c37df74572e8b6

Authored by Bassam Tabbara
1 parent 4339569f
Exists in master and in 1 other branch v3

Simplify SIMD make scripts

ax_ext.m4 no longer performs any CPU checks. Instead it just checks
if the the compile supports SIMD flags.

Runtime detection will choose the right methods base on CPU
instructions available.

Intel AVX support is still done through the build since it would
require a major refactoring of the code base to support it at runtime.
For now I added a configuration flag --enable-avx that can be used
to compile with AVX support.

Also use cpu intrinsics instead of __asm__
configure.ac
... ... @@ -66,5 +66,14 @@ AC_ARG_ENABLE([valgrind],
66 66 [enable_valgrind=no])
67 67 AM_CONDITIONAL(ENABLE_VALGRIND, test "x$enable_valgrind" != xno)
68 68  
  69 +AC_ARG_ENABLE([avx], AS_HELP_STRING([--enable-avx], [Build with AVX optimizations]))
  70 +AX_CHECK_COMPILE_FLAG(-mavx, [ax_cv_support_avx=yes], [])
  71 +
  72 +AS_IF([test "x$enable_avx" = "xyes"],
  73 + [AS_IF([test "x$ax_cv_support_avx" = "xno"],
  74 + [AC_MSG_ERROR([AVX requested but compiler does not support -mavx])],
  75 + [SIMD_FLAGS="$SIMD_FLAGS -mavx"])
  76 + ])
  77 +
69 78 AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile])
70 79 AC_OUTPUT
... ...
m4/ax_ext.m4
1 1 #
2   -# Updated by KMG to support -DINTEL_SSE for GF-Complete
  2 +# This macro is based on http://www.gnu.org/software/autoconf-archive/ax_ext.html
  3 +# but simplified to do compile time SIMD checks only
3 4 #
4   -# ===========================================================================
5   -# http://www.gnu.org/software/autoconf-archive/ax_ext.html
6   -# ===========================================================================
7   -#
8   -# SYNOPSIS
9   -#
10   -# AX_EXT
11   -#
12   -# DESCRIPTION
13   -#
14   -# Find supported SIMD extensions by requesting cpuid. When an SIMD
15   -# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
16   -# compiler supports it. For example, if "sse2" is available, then "-msse2"
17   -# is added to SIMD_FLAGS.
18   -#
19   -# This macro calls:
20   -#
21   -# AC_SUBST(SIMD_FLAGS)
22   -#
23   -# And defines:
24   -#
25   -# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
26   -#
27   -# LICENSE
28   -#
29   -# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
30   -# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
31   -#
32   -# Copying and distribution of this file, with or without modification, are
33   -# permitted in any medium without royalty provided the copyright notice
34   -# and this notice are preserved. This file is offered as-is, without any
35   -# warranty.
36   -
37   -#serial 12
38 5  
39 6 AC_DEFUN([AX_EXT],
40 7 [
... ... @@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT],
45 12 AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
46 13 SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
47 14  
48   - AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
49   - [
50   - # TODO: detect / cross-compile
51   - ax_cv_have_neon_ext=yes
52   - ])
53   - AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],
54   - [
55   - # TODO: detect / cross-compile
56   - ax_cv_have_arm_crypt_ext=yes
57   - ])
58   -
59   - if test "$ax_cv_have_arm_crypt_ext" = yes; then
60   - AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])
61   - fi
62   -
  15 + AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
63 16 if test "$ax_cv_have_neon_ext" = yes; then
64   - AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
  17 + AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, [SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON"], [ax_cv_have_neon_ext=no])
65 18 fi
66   -
67   - if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then
68   - AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,
69   - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])
70   - elif test "$ax_cv_have_arm_crypt_ext" = yes; then
71   - AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,
72   - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])
73   - elif test "$ax_cv_have_neon_ext" = yes; then
74   - AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,
75   - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])
76   - fi
77   - ;;
  19 + ;;
78 20  
79 21 arm*)
80   - AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
81   - [
82   - # TODO: detect / cross-compile
83   - ax_cv_have_neon_ext=yes
84   - ])
85   -
  22 + AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
86 23 if test "$ax_cv_have_neon_ext" = yes; then
87   - AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
88   - AX_CHECK_COMPILE_FLAG(-mfpu=neon,
89   - SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", [])
  24 + AX_CHECK_COMPILE_FLAG(-mfpu=neon, [SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON"], [ax_cv_have_neon_ext=no])
90 25 fi
91   - ;;
  26 + ;;
92 27  
93 28 powerpc*)
94   - AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
95   - [
96   - if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
97   - if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
98   - ax_cv_have_altivec_ext=yes
99   - fi
100   - fi
101   - ])
102   -
103   - if test "$ax_cv_have_altivec_ext" = yes; then
104   - AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
105   - AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
106   - fi
107   - ;;
108   -
109   -
110   - i[[3456]]86*|x86_64*|amd64*)
111   -
112   - AC_REQUIRE([AX_GCC_X86_CPUID])
113   - AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
114   -
115   - AX_GCC_X86_CPUID(0x00000001)
116   - ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
117   - edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
118   -
119   - AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
120   - [
121   - ax_cv_have_mmx_ext=no
122   - if test "$((0x$edx>>23&0x01))" = 1; then
123   - ax_cv_have_mmx_ext=yes
124   - fi
125   - ])
126   -
127   - AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
128   - [
129   - ax_cv_have_sse_ext=no
130   - if test "$((0x$edx>>25&0x01))" = 1; then
131   - ax_cv_have_sse_ext=yes
132   - fi
133   - ])
134   -
135   - AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
136   - [
137   - ax_cv_have_sse2_ext=no
138   - if test "$((0x$edx>>26&0x01))" = 1; then
139   - ax_cv_have_sse2_ext=yes
140   - fi
141   - ])
142   -
143   - AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
144   - [
145   - ax_cv_have_sse3_ext=no
146   - if test "$((0x$ecx&0x01))" = 1; then
147   - ax_cv_have_sse3_ext=yes
148   - fi
149   - ])
150   -
151   - AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext],
152   - [
153   - ax_cv_have_pclmuldq_ext=no
154   - if test "$((0x$ecx>>1&0x01))" = 1; then
155   - ax_cv_have_pclmuldq_ext=yes
156   - fi
157   - ])
158   -
159   - AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
160   - [
161   - ax_cv_have_ssse3_ext=no
162   - if test "$((0x$ecx>>9&0x01))" = 1; then
163   - ax_cv_have_ssse3_ext=yes
164   - fi
165   - ])
166   -
167   - AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
168   - [
169   - ax_cv_have_sse41_ext=no
170   - if test "$((0x$ecx>>19&0x01))" = 1; then
171   - ax_cv_have_sse41_ext=yes
172   - fi
173   - ])
174   -
175   - AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
176   - [
177   - ax_cv_have_sse42_ext=no
178   - if test "$((0x$ecx>>20&0x01))" = 1; then
179   - ax_cv_have_sse42_ext=yes
180   - fi
181   - ])
182   -
183   - AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
184   - [
185   - ax_cv_have_avx_cpu_ext=no
186   - if test "$((0x$ecx>>28&0x01))" = 1; then
187   - ax_cv_have_avx_cpu_ext=yes
188   - fi
189   - ])
190   -
191   - if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
192   - AX_GCC_X86_AVX_XGETBV(0x00000000)
193   -
194   - xgetbv_eax="0"
195   - if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
196   - xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
197   - fi
198   -
199   - AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
200   - [
201   - ax_cv_have_avx_ext=no
202   -
203   - if test "$((0x$ecx>>27&0x01))" = 1; then
204   - if test "$((0x$xgetbv_eax&0x6))" = 6; then
205   - ax_cv_have_avx_ext=yes
206   - fi
207   - fi
208   - ])
209   - if test x"$ax_cv_have_avx_ext" = x"no"; then
210   - AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
211   - fi
  29 + AC_CACHE_CHECK([whether altivec is enabled], [ax_cv_have_altivec_ext], [ax_cv_have_altivec_ext=yes])
  30 + if test "$ax_cv_have_altivec_ext" = yes; then
  31 + AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [ax_cv_have_altivec_ext=no])
212 32 fi
  33 + ;;
213 34  
214   - if test "$ax_cv_have_mmx_ext" = yes; then
215   - AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
216   - if test x"$ax_cv_support_mmx_ext" = x"yes"; then
217   - SIMD_FLAGS="$SIMD_FLAGS -mmmx"
218   - AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
219   - else
220   - AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
221   - fi
222   - fi
  35 + i[[3456]]86*|x86_64*|amd64*)
223 36  
  37 + AC_CACHE_CHECK([whether sse is enabled], [ax_cv_have_sse_ext], [ax_cv_have_sse_ext=yes])
224 38 if test "$ax_cv_have_sse_ext" = yes; then
225   - AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
226   - if test x"$ax_cv_support_sse_ext" = x"yes"; then
227   - SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"
228   - AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
229   - else
230   - AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
231   - fi
  39 + AX_CHECK_COMPILE_FLAG(-msse, [SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"], [ax_cv_have_sse_ext=no])
232 40 fi
233 41  
  42 + AC_CACHE_CHECK([whether sse2 is enabled], [ax_cv_have_sse2_ext], [ax_cv_have_sse2_ext=yes])
234 43 if test "$ax_cv_have_sse2_ext" = yes; then
235   - AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
236   - if test x"$ax_cv_support_sse2_ext" = x"yes"; then
237   - SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"
238   - AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
239   - else
240   - AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
241   - fi
  44 + AX_CHECK_COMPILE_FLAG(-msse2, [SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"], [ax_cv_have_sse2_ext=no])
242 45 fi
243 46  
  47 + AC_CACHE_CHECK([whether sse3 is enabled], [ax_cv_have_sse3_ext], [ax_cv_have_sse3_ext=yes])
244 48 if test "$ax_cv_have_sse3_ext" = yes; then
245   - AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
246   - if test x"$ax_cv_support_sse3_ext" = x"yes"; then
247   - SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"
248   - AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
249   - else
250   - AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
251   - fi
252   - fi
253   -
254   - if test "$ax_cv_have_pclmuldq_ext" = yes; then
255   - AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, [])
256   - if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then
257   - SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"
258   - AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication])
259   - else
260   - AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?])
261   - fi
  49 + AX_CHECK_COMPILE_FLAG(-msse3, [SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"], [ax_cv_have_sse3_ext=no])
262 50 fi
263 51  
  52 + AC_CACHE_CHECK([whether ssse3 is enabled], [ax_cv_have_ssse3_ext], [ax_cv_have_ssse3_ext=yes])
264 53 if test "$ax_cv_have_ssse3_ext" = yes; then
265   - AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
266   - if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
267   - SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"
268   - AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
269   - else
270   - AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
271   - fi
  54 + AX_CHECK_COMPILE_FLAG(-mssse3, [SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"], [ax_cv_have_ssse3_ext=no])
272 55 fi
273 56  
274   - if test "$ax_cv_have_sse41_ext" = yes; then
275   - AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
276   - if test x"$ax_cv_support_sse41_ext" = x"yes"; then
277   - SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"
278   - AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
279   - else
280   - AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
281   - fi
  57 + AC_CACHE_CHECK([whether pclmuldq is enabled], [ax_cv_have_pclmuldq_ext], [ax_cv_have_pclmuldq_ext=yes])
  58 + if test "$ax_cv_have_pclmuldq_ext" = yes; then
  59 + AX_CHECK_COMPILE_FLAG(-mpclmul, [SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"], [ax_cv_have_pclmuldq_ext=no])
282 60 fi
283 61  
284   - if test "$ax_cv_have_sse42_ext" = yes; then
285   - AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
286   - if test x"$ax_cv_support_sse42_ext" = x"yes"; then
287   - SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"
288   - AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
289   - else
290   - AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
291   - fi
  62 + AC_CACHE_CHECK([whether sse4.1 is enabled], [ax_cv_have_sse41_ext], [ax_cv_have_sse41_ext=yes])
  63 + if test "$ax_cv_have_sse41_ext" = yes; then
  64 + AX_CHECK_COMPILE_FLAG(-msse4.1, [SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"], [ax_cv_have_sse41_ext=no])
292 65 fi
293 66  
294   - if test "$ax_cv_have_avx_ext" = yes; then
295   - AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
296   - if test x"$ax_cv_support_avx_ext" = x"yes"; then
297   - SIMD_FLAGS="$SIMD_FLAGS -mavx"
298   - AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
299   - else
300   - AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
301   - fi
  67 + AC_CACHE_CHECK([whether sse4.2 is enabled], [ax_cv_have_sse42_ext], [ax_cv_have_sse42_ext=yes])
  68 + if test "$ax_cv_have_sse42_ext" = yes; then
  69 + AX_CHECK_COMPILE_FLAG(-msse4.2, [SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"], [ax_cv_have_sse42_ext=no])
302 70 fi
303   -
304   - ;;
  71 + ;;
305 72 esac
306 73  
307 74 AC_SUBST(SIMD_FLAGS)
... ...
m4/ax_gcc_x86_avx_xgetbv.m4
... ... @@ -1,79 +0,0 @@
1   -# ===========================================================================
2   -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
3   -# ===========================================================================
4   -#
5   -# SYNOPSIS
6   -#
7   -# AX_GCC_X86_AVX_XGETBV
8   -#
9   -# DESCRIPTION
10   -#
11   -# On later x86 processors with AVX SIMD support, with gcc or a compiler
12   -# that has a compatible syntax for inline assembly instructions, run a
13   -# small program that executes the xgetbv instruction with input OP. This
14   -# can be used to detect if the OS supports AVX instruction usage.
15   -#
16   -# On output, the values of the eax and edx registers are stored as
17   -# hexadecimal strings as "eax:edx" in the cache variable
18   -# ax_cv_gcc_x86_avx_xgetbv.
19   -#
20   -# If the xgetbv instruction fails (because you are running a
21   -# cross-compiler, or because you are not using gcc, or because you are on
22   -# a processor that doesn't have this instruction),
23   -# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
24   -#
25   -# This macro mainly exists to be used in AX_EXT.
26   -#
27   -# LICENSE
28   -#
29   -# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
30   -#
31   -# This program is free software: you can redistribute it and/or modify it
32   -# under the terms of the GNU General Public License as published by the
33   -# Free Software Foundation, either version 3 of the License, or (at your
34   -# option) any later version.
35   -#
36   -# This program is distributed in the hope that it will be useful, but
37   -# WITHOUT ANY WARRANTY; without even the implied warranty of
38   -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
39   -# Public License for more details.
40   -#
41   -# You should have received a copy of the GNU General Public License along
42   -# with this program. If not, see <http://www.gnu.org/licenses/>.
43   -#
44   -# As a special exception, the respective Autoconf Macro's copyright owner
45   -# gives unlimited permission to copy, distribute and modify the configure
46   -# scripts that are the output of Autoconf when processing the Macro. You
47   -# need not follow the terms of the GNU General Public License when using
48   -# or distributing such scripts, even though portions of the text of the
49   -# Macro appear in them. The GNU General Public License (GPL) does govern
50   -# all other use of the material that constitutes the Autoconf Macro.
51   -#
52   -# This special exception to the GPL applies to versions of the Autoconf
53   -# Macro released by the Autoconf Archive. When you make and distribute a
54   -# modified version of the Autoconf Macro, you may extend this special
55   -# exception to the GPL to apply to your modified version as well.
56   -
57   -#serial 1
58   -
59   -AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
60   -[AC_REQUIRE([AC_PROG_CC])
61   -AC_LANG_PUSH([C])
62   -AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
63   - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
64   - int op = $1, eax, edx;
65   - FILE *f;
66   - /* Opcodes for xgetbv */
67   - __asm__(".byte 0x0f, 0x01, 0xd0"
68   - : "=a" (eax), "=d" (edx)
69   - : "c" (op));
70   - f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
71   - fprintf(f, "%x:%x\n", eax, edx);
72   - fclose(f);
73   - return 0;
74   -])],
75   - [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
76   - [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
77   - [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
78   -AC_LANG_POP([C])
79   -])
m4/ax_gcc_x86_cpuid.m4
... ... @@ -1,79 +0,0 @@
1   -# ===========================================================================
2   -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
3   -# ===========================================================================
4   -#
5   -# SYNOPSIS
6   -#
7   -# AX_GCC_X86_CPUID(OP)
8   -#
9   -# DESCRIPTION
10   -#
11   -# On Pentium and later x86 processors, with gcc or a compiler that has a
12   -# compatible syntax for inline assembly instructions, run a small program
13   -# that executes the cpuid instruction with input OP. This can be used to
14   -# detect the CPU type.
15   -#
16   -# On output, the values of the eax, ebx, ecx, and edx registers are stored
17   -# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
18   -# ax_cv_gcc_x86_cpuid_OP.
19   -#
20   -# If the cpuid instruction fails (because you are running a
21   -# cross-compiler, or because you are not using gcc, or because you are on
22   -# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
23   -# is set to the string "unknown".
24   -#
25   -# This macro mainly exists to be used in AX_GCC_ARCHFLAG.
26   -#
27   -# LICENSE
28   -#
29   -# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
30   -# Copyright (c) 2008 Matteo Frigo
31   -#
32   -# This program is free software: you can redistribute it and/or modify it
33   -# under the terms of the GNU General Public License as published by the
34   -# Free Software Foundation, either version 3 of the License, or (at your
35   -# option) any later version.
36   -#
37   -# This program is distributed in the hope that it will be useful, but
38   -# WITHOUT ANY WARRANTY; without even the implied warranty of
39   -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
40   -# Public License for more details.
41   -#
42   -# You should have received a copy of the GNU General Public License along
43   -# with this program. If not, see <http://www.gnu.org/licenses/>.
44   -#
45   -# As a special exception, the respective Autoconf Macro's copyright owner
46   -# gives unlimited permission to copy, distribute and modify the configure
47   -# scripts that are the output of Autoconf when processing the Macro. You
48   -# need not follow the terms of the GNU General Public License when using
49   -# or distributing such scripts, even though portions of the text of the
50   -# Macro appear in them. The GNU General Public License (GPL) does govern
51   -# all other use of the material that constitutes the Autoconf Macro.
52   -#
53   -# This special exception to the GPL applies to versions of the Autoconf
54   -# Macro released by the Autoconf Archive. When you make and distribute a
55   -# modified version of the Autoconf Macro, you may extend this special
56   -# exception to the GPL to apply to your modified version as well.
57   -
58   -#serial 7
59   -
60   -AC_DEFUN([AX_GCC_X86_CPUID],
61   -[AC_REQUIRE([AC_PROG_CC])
62   -AC_LANG_PUSH([C])
63   -AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
64   - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
65   - int op = $1, eax, ebx, ecx, edx;
66   - FILE *f;
67   - __asm__("cpuid"
68   - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
69   - : "a" (op));
70   - f = fopen("conftest_cpuid", "w"); if (!f) return 1;
71   - fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
72   - fclose(f);
73   - return 0;
74   -])],
75   - [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
76   - [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
77   - [ax_cv_gcc_x86_cpuid_$1=unknown])])
78   -AC_LANG_POP([C])
79   -])
src/gf_cpu.c
... ... @@ -22,20 +22,35 @@ int gf_cpu_supports_arm_neon = 0;
22 22  
23 23 #if defined(__x86_64__)
24 24  
  25 +#if defined(_MSC_VER)
  26 +
  27 +#define cpuid(info, x) __cpuidex(info, x, 0)
  28 +
  29 +#elif defined(__GNUC__)
  30 +
  31 +#include <cpuid.h>
  32 +void cpuid(int info[4], int InfoType){
  33 + __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
  34 +}
  35 +
  36 +#else
  37 +
  38 +#error please add a way to detect CPU SIMD support at runtime
  39 +
  40 +#endif
  41 +
25 42 void gf_cpu_identify(void)
26 43 {
27 44 if (gf_cpu_identified) {
28 45 return;
29 46 }
30 47  
31   - int op = 1, eax, ebx, ecx, edx;
  48 + int reg[4];
32 49  
33   - __asm__("cpuid"
34   - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
35   - : "a" (op));
  50 + cpuid(reg, 1);
36 51  
37 52 #if defined(INTEL_SSE4_PCLMUL)
38   - if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
  53 + if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
39 54 gf_cpu_supports_intel_pclmul = 1;
40 55 #ifdef DEBUG_CPU_DETECTION
41 56 printf("#gf_cpu_supports_intel_pclmul\n");
... ... @@ -44,7 +59,7 @@ void gf_cpu_identify(void)
44 59 #endif
45 60  
46 61 #if defined(INTEL_SSE4)
47   - if (((ecx & (1<<20)) != 0 || (ecx & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
  62 + if (((reg[2] & (1<<20)) != 0 || (reg[2] & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
48 63 gf_cpu_supports_intel_sse4 = 1;
49 64 #ifdef DEBUG_CPU_DETECTION
50 65 printf("#gf_cpu_supports_intel_sse4\n");
... ... @@ -53,7 +68,7 @@ void gf_cpu_identify(void)
53 68 #endif
54 69  
55 70 #if defined(INTEL_SSSE3)
56   - if ((ecx & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
  71 + if ((reg[2] & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
57 72 gf_cpu_supports_intel_ssse3 = 1;
58 73 #ifdef DEBUG_CPU_DETECTION
59 74 printf("#gf_cpu_supports_intel_ssse3\n");
... ... @@ -62,7 +77,7 @@ void gf_cpu_identify(void)
62 77 #endif
63 78  
64 79 #if defined(INTEL_SSE3)
65   - if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
  80 + if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
66 81 gf_cpu_supports_intel_sse3 = 1;
67 82 #ifdef DEBUG_CPU_DETECTION
68 83 printf("#gf_cpu_supports_intel_sse3\n");
... ... @@ -71,7 +86,7 @@ void gf_cpu_identify(void)
71 86 #endif
72 87  
73 88 #if defined(INTEL_SSE2)
74   - if ((edx & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
  89 + if ((reg[3] & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
75 90 gf_cpu_supports_intel_sse2 = 1;
76 91 #ifdef DEBUG_CPU_DETECTION
77 92 printf("#gf_cpu_supports_intel_sse2\n");
... ...
test/Makefile.am
1 1 # GF-Complete 'test' AM file
2 2  
3 3 AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
4   -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC
  4 +AM_CFLAGS = -O3 -fPIC
5 5  
6 6 bin_PROGRAMS = gf_unit
7 7  
... ...
tools/Makefile.am
1 1 # GF-Complete 'tools' AM file
2 2  
3 3 AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
4   -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC
  4 +AM_CFLAGS = -O3 -fPIC
5 5  
6 6 bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time
7 7  
... ...
tools/test_simd.sh
... ... @@ -27,6 +27,16 @@ test_functions() {
27 27 return ${failed}
28 28 }
29 29  
  30 +# build with DEBUG_CPU_FUNCTIONS and print out CPU detection
  31 +test_detection() {
  32 + failed=0
  33 +
  34 + { ./configure && make clean && make CFLAGS="-DDEBUG_CPU_DETECTION"; } || { echo "Compile FAILED" >> ${results}; return 1; }
  35 + { ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); }
  36 +
  37 + return ${failed}
  38 +}
  39 +
30 40 compile_arm() {
31 41 failed=0
32 42  
... ... @@ -167,7 +177,7 @@ runtime_intel_flags() {
167 177 { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); }
168 178 done
169 179  
170   - echo "====SSE2 support..." >> ${1}
  180 + echo "====SSE2 support..." >> ${1}
171 181 export ax_cv_have_sse_ext=no
172 182 export ax_cv_have_sse2_ext=yes
173 183 export ax_cv_have_sse3_ext=no
... ...
tools/test_simd_qemu.sh
... ... @@ -224,6 +224,8 @@ run_test_simd_basic() {
224 224 { run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
225 225 echo "=====running functions test"
226 226 { run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
  227 + echo "=====running detection test"
  228 + { run_test $arch $cpu "detection" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
227 229 echo "=====running runtime test"
228 230 { run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
229 231 stop_qemu
... ...