Commit ad11042132c7db78e8ae57a364c37df74572e8b6

Authored by Bassam Tabbara
1 parent 4339569f
Exists in master and in 1 other branch v3

Simplify SIMD make scripts

ax_ext.m4 no longer performs any CPU checks. Instead it just checks
if the the compile supports SIMD flags.

Runtime detection will choose the right methods base on CPU
instructions available.

Intel AVX support is still done through the build since it would
require a major refactoring of the code base to support it at runtime.
For now I added a configuration flag --enable-avx that can be used
to compile with AVX support.

Also use cpu intrinsics instead of __asm__
@@ -66,5 +66,14 @@ AC_ARG_ENABLE([valgrind], @@ -66,5 +66,14 @@ AC_ARG_ENABLE([valgrind],
66 [enable_valgrind=no]) 66 [enable_valgrind=no])
67 AM_CONDITIONAL(ENABLE_VALGRIND, test "x$enable_valgrind" != xno) 67 AM_CONDITIONAL(ENABLE_VALGRIND, test "x$enable_valgrind" != xno)
68 68
  69 +AC_ARG_ENABLE([avx], AS_HELP_STRING([--enable-avx], [Build with AVX optimizations]))
  70 +AX_CHECK_COMPILE_FLAG(-mavx, [ax_cv_support_avx=yes], [])
  71 +
  72 +AS_IF([test "x$enable_avx" = "xyes"],
  73 + [AS_IF([test "x$ax_cv_support_avx" = "xno"],
  74 + [AC_MSG_ERROR([AVX requested but compiler does not support -mavx])],
  75 + [SIMD_FLAGS="$SIMD_FLAGS -mavx"])
  76 + ])
  77 +
69 AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile]) 78 AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile])
70 AC_OUTPUT 79 AC_OUTPUT
1 # 1 #
2 -# Updated by KMG to support -DINTEL_SSE for GF-Complete 2 +# This macro is based on http://www.gnu.org/software/autoconf-archive/ax_ext.html
  3 +# but simplified to do compile time SIMD checks only
3 # 4 #
4 -# ===========================================================================  
5 -# http://www.gnu.org/software/autoconf-archive/ax_ext.html  
6 -# ===========================================================================  
7 -#  
8 -# SYNOPSIS  
9 -#  
10 -# AX_EXT  
11 -#  
12 -# DESCRIPTION  
13 -#  
14 -# Find supported SIMD extensions by requesting cpuid. When an SIMD  
15 -# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if  
16 -# compiler supports it. For example, if "sse2" is available, then "-msse2"  
17 -# is added to SIMD_FLAGS.  
18 -#  
19 -# This macro calls:  
20 -#  
21 -# AC_SUBST(SIMD_FLAGS)  
22 -#  
23 -# And defines:  
24 -#  
25 -# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX  
26 -#  
27 -# LICENSE  
28 -#  
29 -# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>  
30 -# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>  
31 -#  
32 -# Copying and distribution of this file, with or without modification, are  
33 -# permitted in any medium without royalty provided the copyright notice  
34 -# and this notice are preserved. This file is offered as-is, without any  
35 -# warranty.  
36 -  
37 -#serial 12  
38 5
39 AC_DEFUN([AX_EXT], 6 AC_DEFUN([AX_EXT],
40 [ 7 [
@@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT], @@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT],
45 AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64]) 12 AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
46 SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64" 13 SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
47 14
48 - AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],  
49 - [  
50 - # TODO: detect / cross-compile  
51 - ax_cv_have_neon_ext=yes  
52 - ])  
53 - AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],  
54 - [  
55 - # TODO: detect / cross-compile  
56 - ax_cv_have_arm_crypt_ext=yes  
57 - ])  
58 -  
59 - if test "$ax_cv_have_arm_crypt_ext" = yes; then  
60 - AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])  
61 - fi  
62 - 15 + AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
63 if test "$ax_cv_have_neon_ext" = yes; then 16 if test "$ax_cv_have_neon_ext" = yes; then
64 - AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) 17 + AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, [SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON"], [ax_cv_have_neon_ext=no])
65 fi 18 fi
66 -  
67 - if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then  
68 - AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,  
69 - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])  
70 - elif test "$ax_cv_have_arm_crypt_ext" = yes; then  
71 - AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,  
72 - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])  
73 - elif test "$ax_cv_have_neon_ext" = yes; then  
74 - AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,  
75 - SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])  
76 - fi  
77 - ;; 19 + ;;
78 20
79 arm*) 21 arm*)
80 - AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],  
81 - [  
82 - # TODO: detect / cross-compile  
83 - ax_cv_have_neon_ext=yes  
84 - ])  
85 - 22 + AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
86 if test "$ax_cv_have_neon_ext" = yes; then 23 if test "$ax_cv_have_neon_ext" = yes; then
87 - AC_DEFINE(HAVE_NEON,,[Support NEON instructions])  
88 - AX_CHECK_COMPILE_FLAG(-mfpu=neon,  
89 - SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", []) 24 + AX_CHECK_COMPILE_FLAG(-mfpu=neon, [SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON"], [ax_cv_have_neon_ext=no])
90 fi 25 fi
91 - ;; 26 + ;;
92 27
93 powerpc*) 28 powerpc*)
94 - AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],  
95 - [  
96 - if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then  
97 - if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then  
98 - ax_cv_have_altivec_ext=yes  
99 - fi  
100 - fi  
101 - ])  
102 -  
103 - if test "$ax_cv_have_altivec_ext" = yes; then  
104 - AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])  
105 - AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])  
106 - fi  
107 - ;;  
108 -  
109 -  
110 - i[[3456]]86*|x86_64*|amd64*)  
111 -  
112 - AC_REQUIRE([AX_GCC_X86_CPUID])  
113 - AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])  
114 -  
115 - AX_GCC_X86_CPUID(0x00000001)  
116 - ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`  
117 - edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`  
118 -  
119 - AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],  
120 - [  
121 - ax_cv_have_mmx_ext=no  
122 - if test "$((0x$edx>>23&0x01))" = 1; then  
123 - ax_cv_have_mmx_ext=yes  
124 - fi  
125 - ])  
126 -  
127 - AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],  
128 - [  
129 - ax_cv_have_sse_ext=no  
130 - if test "$((0x$edx>>25&0x01))" = 1; then  
131 - ax_cv_have_sse_ext=yes  
132 - fi  
133 - ])  
134 -  
135 - AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],  
136 - [  
137 - ax_cv_have_sse2_ext=no  
138 - if test "$((0x$edx>>26&0x01))" = 1; then  
139 - ax_cv_have_sse2_ext=yes  
140 - fi  
141 - ])  
142 -  
143 - AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],  
144 - [  
145 - ax_cv_have_sse3_ext=no  
146 - if test "$((0x$ecx&0x01))" = 1; then  
147 - ax_cv_have_sse3_ext=yes  
148 - fi  
149 - ])  
150 -  
151 - AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext],  
152 - [  
153 - ax_cv_have_pclmuldq_ext=no  
154 - if test "$((0x$ecx>>1&0x01))" = 1; then  
155 - ax_cv_have_pclmuldq_ext=yes  
156 - fi  
157 - ])  
158 -  
159 - AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],  
160 - [  
161 - ax_cv_have_ssse3_ext=no  
162 - if test "$((0x$ecx>>9&0x01))" = 1; then  
163 - ax_cv_have_ssse3_ext=yes  
164 - fi  
165 - ])  
166 -  
167 - AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],  
168 - [  
169 - ax_cv_have_sse41_ext=no  
170 - if test "$((0x$ecx>>19&0x01))" = 1; then  
171 - ax_cv_have_sse41_ext=yes  
172 - fi  
173 - ])  
174 -  
175 - AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],  
176 - [  
177 - ax_cv_have_sse42_ext=no  
178 - if test "$((0x$ecx>>20&0x01))" = 1; then  
179 - ax_cv_have_sse42_ext=yes  
180 - fi  
181 - ])  
182 -  
183 - AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],  
184 - [  
185 - ax_cv_have_avx_cpu_ext=no  
186 - if test "$((0x$ecx>>28&0x01))" = 1; then  
187 - ax_cv_have_avx_cpu_ext=yes  
188 - fi  
189 - ])  
190 -  
191 - if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then  
192 - AX_GCC_X86_AVX_XGETBV(0x00000000)  
193 -  
194 - xgetbv_eax="0"  
195 - if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then  
196 - xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`  
197 - fi  
198 -  
199 - AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],  
200 - [  
201 - ax_cv_have_avx_ext=no  
202 -  
203 - if test "$((0x$ecx>>27&0x01))" = 1; then  
204 - if test "$((0x$xgetbv_eax&0x6))" = 6; then  
205 - ax_cv_have_avx_ext=yes  
206 - fi  
207 - fi  
208 - ])  
209 - if test x"$ax_cv_have_avx_ext" = x"no"; then  
210 - AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])  
211 - fi 29 + AC_CACHE_CHECK([whether altivec is enabled], [ax_cv_have_altivec_ext], [ax_cv_have_altivec_ext=yes])
  30 + if test "$ax_cv_have_altivec_ext" = yes; then
  31 + AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [ax_cv_have_altivec_ext=no])
212 fi 32 fi
  33 + ;;
213 34
214 - if test "$ax_cv_have_mmx_ext" = yes; then  
215 - AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])  
216 - if test x"$ax_cv_support_mmx_ext" = x"yes"; then  
217 - SIMD_FLAGS="$SIMD_FLAGS -mmmx"  
218 - AC_DEFINE(HAVE_MMX,,[Support mmx instructions])  
219 - else  
220 - AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])  
221 - fi  
222 - fi 35 + i[[3456]]86*|x86_64*|amd64*)
223 36
  37 + AC_CACHE_CHECK([whether sse is enabled], [ax_cv_have_sse_ext], [ax_cv_have_sse_ext=yes])
224 if test "$ax_cv_have_sse_ext" = yes; then 38 if test "$ax_cv_have_sse_ext" = yes; then
225 - AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])  
226 - if test x"$ax_cv_support_sse_ext" = x"yes"; then  
227 - SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"  
228 - AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])  
229 - else  
230 - AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])  
231 - fi 39 + AX_CHECK_COMPILE_FLAG(-msse, [SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"], [ax_cv_have_sse_ext=no])
232 fi 40 fi
233 41
  42 + AC_CACHE_CHECK([whether sse2 is enabled], [ax_cv_have_sse2_ext], [ax_cv_have_sse2_ext=yes])
234 if test "$ax_cv_have_sse2_ext" = yes; then 43 if test "$ax_cv_have_sse2_ext" = yes; then
235 - AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])  
236 - if test x"$ax_cv_support_sse2_ext" = x"yes"; then  
237 - SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"  
238 - AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])  
239 - else  
240 - AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])  
241 - fi 44 + AX_CHECK_COMPILE_FLAG(-msse2, [SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"], [ax_cv_have_sse2_ext=no])
242 fi 45 fi
243 46
  47 + AC_CACHE_CHECK([whether sse3 is enabled], [ax_cv_have_sse3_ext], [ax_cv_have_sse3_ext=yes])
244 if test "$ax_cv_have_sse3_ext" = yes; then 48 if test "$ax_cv_have_sse3_ext" = yes; then
245 - AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])  
246 - if test x"$ax_cv_support_sse3_ext" = x"yes"; then  
247 - SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"  
248 - AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])  
249 - else  
250 - AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])  
251 - fi  
252 - fi  
253 -  
254 - if test "$ax_cv_have_pclmuldq_ext" = yes; then  
255 - AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, [])  
256 - if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then  
257 - SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"  
258 - AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication])  
259 - else  
260 - AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?])  
261 - fi 49 + AX_CHECK_COMPILE_FLAG(-msse3, [SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"], [ax_cv_have_sse3_ext=no])
262 fi 50 fi
263 51
  52 + AC_CACHE_CHECK([whether ssse3 is enabled], [ax_cv_have_ssse3_ext], [ax_cv_have_ssse3_ext=yes])
264 if test "$ax_cv_have_ssse3_ext" = yes; then 53 if test "$ax_cv_have_ssse3_ext" = yes; then
265 - AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])  
266 - if test x"$ax_cv_support_ssse3_ext" = x"yes"; then  
267 - SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"  
268 - AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])  
269 - else  
270 - AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])  
271 - fi 54 + AX_CHECK_COMPILE_FLAG(-mssse3, [SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"], [ax_cv_have_ssse3_ext=no])
272 fi 55 fi
273 56
274 - if test "$ax_cv_have_sse41_ext" = yes; then  
275 - AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])  
276 - if test x"$ax_cv_support_sse41_ext" = x"yes"; then  
277 - SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"  
278 - AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])  
279 - else  
280 - AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])  
281 - fi 57 + AC_CACHE_CHECK([whether pclmuldq is enabled], [ax_cv_have_pclmuldq_ext], [ax_cv_have_pclmuldq_ext=yes])
  58 + if test "$ax_cv_have_pclmuldq_ext" = yes; then
  59 + AX_CHECK_COMPILE_FLAG(-mpclmul, [SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"], [ax_cv_have_pclmuldq_ext=no])
282 fi 60 fi
283 61
284 - if test "$ax_cv_have_sse42_ext" = yes; then  
285 - AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])  
286 - if test x"$ax_cv_support_sse42_ext" = x"yes"; then  
287 - SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"  
288 - AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])  
289 - else  
290 - AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])  
291 - fi 62 + AC_CACHE_CHECK([whether sse4.1 is enabled], [ax_cv_have_sse41_ext], [ax_cv_have_sse41_ext=yes])
  63 + if test "$ax_cv_have_sse41_ext" = yes; then
  64 + AX_CHECK_COMPILE_FLAG(-msse4.1, [SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"], [ax_cv_have_sse41_ext=no])
292 fi 65 fi
293 66
294 - if test "$ax_cv_have_avx_ext" = yes; then  
295 - AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])  
296 - if test x"$ax_cv_support_avx_ext" = x"yes"; then  
297 - SIMD_FLAGS="$SIMD_FLAGS -mavx"  
298 - AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])  
299 - else  
300 - AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])  
301 - fi 67 + AC_CACHE_CHECK([whether sse4.2 is enabled], [ax_cv_have_sse42_ext], [ax_cv_have_sse42_ext=yes])
  68 + if test "$ax_cv_have_sse42_ext" = yes; then
  69 + AX_CHECK_COMPILE_FLAG(-msse4.2, [SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"], [ax_cv_have_sse42_ext=no])
302 fi 70 fi
303 -  
304 - ;; 71 + ;;
305 esac 72 esac
306 73
307 AC_SUBST(SIMD_FLAGS) 74 AC_SUBST(SIMD_FLAGS)
m4/ax_gcc_x86_avx_xgetbv.m4
@@ -1,79 +0,0 @@ @@ -1,79 +0,0 @@
1 -# ===========================================================================  
2 -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html  
3 -# ===========================================================================  
4 -#  
5 -# SYNOPSIS  
6 -#  
7 -# AX_GCC_X86_AVX_XGETBV  
8 -#  
9 -# DESCRIPTION  
10 -#  
11 -# On later x86 processors with AVX SIMD support, with gcc or a compiler  
12 -# that has a compatible syntax for inline assembly instructions, run a  
13 -# small program that executes the xgetbv instruction with input OP. This  
14 -# can be used to detect if the OS supports AVX instruction usage.  
15 -#  
16 -# On output, the values of the eax and edx registers are stored as  
17 -# hexadecimal strings as "eax:edx" in the cache variable  
18 -# ax_cv_gcc_x86_avx_xgetbv.  
19 -#  
20 -# If the xgetbv instruction fails (because you are running a  
21 -# cross-compiler, or because you are not using gcc, or because you are on  
22 -# a processor that doesn't have this instruction),  
23 -# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".  
24 -#  
25 -# This macro mainly exists to be used in AX_EXT.  
26 -#  
27 -# LICENSE  
28 -#  
29 -# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>  
30 -#  
31 -# This program is free software: you can redistribute it and/or modify it  
32 -# under the terms of the GNU General Public License as published by the  
33 -# Free Software Foundation, either version 3 of the License, or (at your  
34 -# option) any later version.  
35 -#  
36 -# This program is distributed in the hope that it will be useful, but  
37 -# WITHOUT ANY WARRANTY; without even the implied warranty of  
38 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General  
39 -# Public License for more details.  
40 -#  
41 -# You should have received a copy of the GNU General Public License along  
42 -# with this program. If not, see <http://www.gnu.org/licenses/>.  
43 -#  
44 -# As a special exception, the respective Autoconf Macro's copyright owner  
45 -# gives unlimited permission to copy, distribute and modify the configure  
46 -# scripts that are the output of Autoconf when processing the Macro. You  
47 -# need not follow the terms of the GNU General Public License when using  
48 -# or distributing such scripts, even though portions of the text of the  
49 -# Macro appear in them. The GNU General Public License (GPL) does govern  
50 -# all other use of the material that constitutes the Autoconf Macro.  
51 -#  
52 -# This special exception to the GPL applies to versions of the Autoconf  
53 -# Macro released by the Autoconf Archive. When you make and distribute a  
54 -# modified version of the Autoconf Macro, you may extend this special  
55 -# exception to the GPL to apply to your modified version as well.  
56 -  
57 -#serial 1  
58 -  
59 -AC_DEFUN([AX_GCC_X86_AVX_XGETBV],  
60 -[AC_REQUIRE([AC_PROG_CC])  
61 -AC_LANG_PUSH([C])  
62 -AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,  
63 - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [  
64 - int op = $1, eax, edx;  
65 - FILE *f;  
66 - /* Opcodes for xgetbv */  
67 - __asm__(".byte 0x0f, 0x01, 0xd0"  
68 - : "=a" (eax), "=d" (edx)  
69 - : "c" (op));  
70 - f = fopen("conftest_xgetbv", "w"); if (!f) return 1;  
71 - fprintf(f, "%x:%x\n", eax, edx);  
72 - fclose(f);  
73 - return 0;  
74 -])],  
75 - [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],  
76 - [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],  
77 - [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])  
78 -AC_LANG_POP([C])  
79 -])  
m4/ax_gcc_x86_cpuid.m4
@@ -1,79 +0,0 @@ @@ -1,79 +0,0 @@
1 -# ===========================================================================  
2 -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html  
3 -# ===========================================================================  
4 -#  
5 -# SYNOPSIS  
6 -#  
7 -# AX_GCC_X86_CPUID(OP)  
8 -#  
9 -# DESCRIPTION  
10 -#  
11 -# On Pentium and later x86 processors, with gcc or a compiler that has a  
12 -# compatible syntax for inline assembly instructions, run a small program  
13 -# that executes the cpuid instruction with input OP. This can be used to  
14 -# detect the CPU type.  
15 -#  
16 -# On output, the values of the eax, ebx, ecx, and edx registers are stored  
17 -# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable  
18 -# ax_cv_gcc_x86_cpuid_OP.  
19 -#  
20 -# If the cpuid instruction fails (because you are running a  
21 -# cross-compiler, or because you are not using gcc, or because you are on  
22 -# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP  
23 -# is set to the string "unknown".  
24 -#  
25 -# This macro mainly exists to be used in AX_GCC_ARCHFLAG.  
26 -#  
27 -# LICENSE  
28 -#  
29 -# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>  
30 -# Copyright (c) 2008 Matteo Frigo  
31 -#  
32 -# This program is free software: you can redistribute it and/or modify it  
33 -# under the terms of the GNU General Public License as published by the  
34 -# Free Software Foundation, either version 3 of the License, or (at your  
35 -# option) any later version.  
36 -#  
37 -# This program is distributed in the hope that it will be useful, but  
38 -# WITHOUT ANY WARRANTY; without even the implied warranty of  
39 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General  
40 -# Public License for more details.  
41 -#  
42 -# You should have received a copy of the GNU General Public License along  
43 -# with this program. If not, see <http://www.gnu.org/licenses/>.  
44 -#  
45 -# As a special exception, the respective Autoconf Macro's copyright owner  
46 -# gives unlimited permission to copy, distribute and modify the configure  
47 -# scripts that are the output of Autoconf when processing the Macro. You  
48 -# need not follow the terms of the GNU General Public License when using  
49 -# or distributing such scripts, even though portions of the text of the  
50 -# Macro appear in them. The GNU General Public License (GPL) does govern  
51 -# all other use of the material that constitutes the Autoconf Macro.  
52 -#  
53 -# This special exception to the GPL applies to versions of the Autoconf  
54 -# Macro released by the Autoconf Archive. When you make and distribute a  
55 -# modified version of the Autoconf Macro, you may extend this special  
56 -# exception to the GPL to apply to your modified version as well.  
57 -  
58 -#serial 7  
59 -  
60 -AC_DEFUN([AX_GCC_X86_CPUID],  
61 -[AC_REQUIRE([AC_PROG_CC])  
62 -AC_LANG_PUSH([C])  
63 -AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,  
64 - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [  
65 - int op = $1, eax, ebx, ecx, edx;  
66 - FILE *f;  
67 - __asm__("cpuid"  
68 - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)  
69 - : "a" (op));  
70 - f = fopen("conftest_cpuid", "w"); if (!f) return 1;  
71 - fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);  
72 - fclose(f);  
73 - return 0;  
74 -])],  
75 - [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],  
76 - [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],  
77 - [ax_cv_gcc_x86_cpuid_$1=unknown])])  
78 -AC_LANG_POP([C])  
79 -])  
@@ -22,20 +22,35 @@ int gf_cpu_supports_arm_neon = 0; @@ -22,20 +22,35 @@ int gf_cpu_supports_arm_neon = 0;
22 22
23 #if defined(__x86_64__) 23 #if defined(__x86_64__)
24 24
  25 +#if defined(_MSC_VER)
  26 +
  27 +#define cpuid(info, x) __cpuidex(info, x, 0)
  28 +
  29 +#elif defined(__GNUC__)
  30 +
  31 +#include <cpuid.h>
  32 +void cpuid(int info[4], int InfoType){
  33 + __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
  34 +}
  35 +
  36 +#else
  37 +
  38 +#error please add a way to detect CPU SIMD support at runtime
  39 +
  40 +#endif
  41 +
25 void gf_cpu_identify(void) 42 void gf_cpu_identify(void)
26 { 43 {
27 if (gf_cpu_identified) { 44 if (gf_cpu_identified) {
28 return; 45 return;
29 } 46 }
30 47
31 - int op = 1, eax, ebx, ecx, edx; 48 + int reg[4];
32 49
33 - __asm__("cpuid"  
34 - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)  
35 - : "a" (op)); 50 + cpuid(reg, 1);
36 51
37 #if defined(INTEL_SSE4_PCLMUL) 52 #if defined(INTEL_SSE4_PCLMUL)
38 - if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) { 53 + if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
39 gf_cpu_supports_intel_pclmul = 1; 54 gf_cpu_supports_intel_pclmul = 1;
40 #ifdef DEBUG_CPU_DETECTION 55 #ifdef DEBUG_CPU_DETECTION
41 printf("#gf_cpu_supports_intel_pclmul\n"); 56 printf("#gf_cpu_supports_intel_pclmul\n");
@@ -44,7 +59,7 @@ void gf_cpu_identify(void) @@ -44,7 +59,7 @@ void gf_cpu_identify(void)
44 #endif 59 #endif
45 60
46 #if defined(INTEL_SSE4) 61 #if defined(INTEL_SSE4)
47 - if (((ecx & (1<<20)) != 0 || (ecx & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) { 62 + if (((reg[2] & (1<<20)) != 0 || (reg[2] & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
48 gf_cpu_supports_intel_sse4 = 1; 63 gf_cpu_supports_intel_sse4 = 1;
49 #ifdef DEBUG_CPU_DETECTION 64 #ifdef DEBUG_CPU_DETECTION
50 printf("#gf_cpu_supports_intel_sse4\n"); 65 printf("#gf_cpu_supports_intel_sse4\n");
@@ -53,7 +68,7 @@ void gf_cpu_identify(void) @@ -53,7 +68,7 @@ void gf_cpu_identify(void)
53 #endif 68 #endif
54 69
55 #if defined(INTEL_SSSE3) 70 #if defined(INTEL_SSSE3)
56 - if ((ecx & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) { 71 + if ((reg[2] & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
57 gf_cpu_supports_intel_ssse3 = 1; 72 gf_cpu_supports_intel_ssse3 = 1;
58 #ifdef DEBUG_CPU_DETECTION 73 #ifdef DEBUG_CPU_DETECTION
59 printf("#gf_cpu_supports_intel_ssse3\n"); 74 printf("#gf_cpu_supports_intel_ssse3\n");
@@ -62,7 +77,7 @@ void gf_cpu_identify(void) @@ -62,7 +77,7 @@ void gf_cpu_identify(void)
62 #endif 77 #endif
63 78
64 #if defined(INTEL_SSE3) 79 #if defined(INTEL_SSE3)
65 - if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) { 80 + if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
66 gf_cpu_supports_intel_sse3 = 1; 81 gf_cpu_supports_intel_sse3 = 1;
67 #ifdef DEBUG_CPU_DETECTION 82 #ifdef DEBUG_CPU_DETECTION
68 printf("#gf_cpu_supports_intel_sse3\n"); 83 printf("#gf_cpu_supports_intel_sse3\n");
@@ -71,7 +86,7 @@ void gf_cpu_identify(void) @@ -71,7 +86,7 @@ void gf_cpu_identify(void)
71 #endif 86 #endif
72 87
73 #if defined(INTEL_SSE2) 88 #if defined(INTEL_SSE2)
74 - if ((edx & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) { 89 + if ((reg[3] & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
75 gf_cpu_supports_intel_sse2 = 1; 90 gf_cpu_supports_intel_sse2 = 1;
76 #ifdef DEBUG_CPU_DETECTION 91 #ifdef DEBUG_CPU_DETECTION
77 printf("#gf_cpu_supports_intel_sse2\n"); 92 printf("#gf_cpu_supports_intel_sse2\n");
test/Makefile.am
1 # GF-Complete 'test' AM file 1 # GF-Complete 'test' AM file
2 2
3 AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include 3 AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
4 -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC 4 +AM_CFLAGS = -O3 -fPIC
5 5
6 bin_PROGRAMS = gf_unit 6 bin_PROGRAMS = gf_unit
7 7
tools/Makefile.am
1 # GF-Complete 'tools' AM file 1 # GF-Complete 'tools' AM file
2 2
3 AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include 3 AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
4 -AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC 4 +AM_CFLAGS = -O3 -fPIC
5 5
6 bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time 6 bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time
7 7
tools/test_simd.sh
@@ -27,6 +27,16 @@ test_functions() { @@ -27,6 +27,16 @@ test_functions() {
27 return ${failed} 27 return ${failed}
28 } 28 }
29 29
  30 +# build with DEBUG_CPU_FUNCTIONS and print out CPU detection
  31 +test_detection() {
  32 + failed=0
  33 +
  34 + { ./configure && make clean && make CFLAGS="-DDEBUG_CPU_DETECTION"; } || { echo "Compile FAILED" >> ${results}; return 1; }
  35 + { ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); }
  36 +
  37 + return ${failed}
  38 +}
  39 +
30 compile_arm() { 40 compile_arm() {
31 failed=0 41 failed=0
32 42
@@ -167,7 +177,7 @@ runtime_intel_flags() { @@ -167,7 +177,7 @@ runtime_intel_flags() {
167 { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); } 177 { ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); }
168 done 178 done
169 179
170 - echo "====SSE2 support..." >> ${1} 180 + echo "====SSE2 support..." >> ${1}
171 export ax_cv_have_sse_ext=no 181 export ax_cv_have_sse_ext=no
172 export ax_cv_have_sse2_ext=yes 182 export ax_cv_have_sse2_ext=yes
173 export ax_cv_have_sse3_ext=no 183 export ax_cv_have_sse3_ext=no
tools/test_simd_qemu.sh
@@ -224,6 +224,8 @@ run_test_simd_basic() { @@ -224,6 +224,8 @@ run_test_simd_basic() {
224 { run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } 224 { run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
225 echo "=====running functions test" 225 echo "=====running functions test"
226 { run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } 226 { run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
  227 + echo "=====running detection test"
  228 + { run_test $arch $cpu "detection" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
227 echo "=====running runtime test" 229 echo "=====running runtime test"
228 { run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); } 230 { run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
229 stop_qemu 231 stop_qemu