Commit 29899ad4439aca07ccd2efe2a8b7086efdec72b0

Authored by Loic Dachary
1 parent 0020ff80
Exists in master and in 2 other branches v2, v3

move #if to avoid unused warning

Signed-off-by: Loic Dachary <loic@dachary.org>
src/gf_w128.c
... ... @@ -81,6 +81,7 @@ int xor)
81 81 }
82 82 }
83 83  
  84 +#if defined(INTEL_SSE4_PCLMUL)
84 85 static
85 86 void
86 87 gf_w128_clm_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes,
... ... @@ -91,7 +92,6 @@ int xor)
91 92 gf_val_128_t d128;
92 93 uint64_t c128[2];
93 94 gf_region_data rd;
94   -#if defined(INTEL_SSE4_PCLMUL)
95 95 __m128i a,b;
96 96 __m128i result0,result1;
97 97 __m128i prim_poly;
... ... @@ -184,8 +184,8 @@ int xor)
184 184 d128[i+1] = (uint64_t)_mm_extract_epi64(result1,0);
185 185 }
186 186 }
187   -#endif
188 187 }
  188 +#endif
189 189  
190 190 /*
191 191 * Some w128 notes:
... ... @@ -599,11 +599,11 @@ gf_w128_split_4_128_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_
599 599 }
600 600 }
601 601  
  602 +#ifdef INTEL_SSSE3
602 603 static
603 604 void
604 605 gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
605 606 {
606   -#ifdef INTEL_SSSE3
607 607 gf_internal_t *h;
608 608 int i, m, j, k, tindex;
609 609 uint64_t pp, v[2], s, *s64, *d64, *top;
... ... @@ -695,14 +695,14 @@ gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
695 695 /* Doing this instead of gf_do_final_region_alignment() because that doesn't hold 128-bit vals */
696 696  
697 697 gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, ((char*)src+bytes)-(char*)rd.s_top, xor);
698   -#endif
699 698 }
  699 +#endif
700 700  
  701 +#ifdef INTEL_SSSE3
701 702 static
702 703 void
703 704 gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
704 705 {
705   -#ifdef INTEL_SSSE3
706 706 gf_internal_t *h;
707 707 int i, m, j, k, tindex;
708 708 uint64_t pp, v[2], s, *s64, *d64, *top;
... ... @@ -805,8 +805,8 @@ gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest,
805 805 /* Doing this instead of gf_do_final_region_alignment() because that doesn't hold 128-bit vals */
806 806  
807 807 gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, ((char*)src+bytes)-(char*)rd.s_top, xor);
808   -#endif
809 808 }
  809 +#endif
810 810  
811 811 static
812 812 void
... ... @@ -1495,10 +1495,10 @@ void gf_w128_group_r_init(gf_t *gf)
1495 1495 return;
1496 1496 }
1497 1497  
  1498 +#if defined(INTEL_SSE4)
1498 1499 static
1499 1500 void gf_w128_group_r_sse_init(gf_t *gf)
1500 1501 {
1501   -#if defined(INTEL_SSE4)
1502 1502 int i, j;
1503 1503 int g_r;
1504 1504 uint64_t pp;
... ... @@ -1520,8 +1520,8 @@ void gf_w128_group_r_sse_init(gf_t *gf)
1520 1520 }
1521 1521 }
1522 1522 return;
1523   -#endif
1524 1523 }
  1524 +#endif
1525 1525  
1526 1526 static
1527 1527 int gf_w128_split_init(gf_t *gf)
... ...
src/gf_w16.c
... ... @@ -125,6 +125,7 @@ gf_w16_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t
125 125 gf_do_final_region_alignment(&rd);
126 126 }
127 127  
  128 +#if defined(INTEL_SSE4_PCLMUL)
128 129 static
129 130 void
130 131 gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
... ... @@ -132,8 +133,6 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
132 133 gf_region_data rd;
133 134 uint16_t *s16;
134 135 uint16_t *d16;
135   -
136   -#if defined(INTEL_SSE4_PCLMUL)
137 136 __m128i a, b;
138 137 __m128i result;
139 138 __m128i prim_poly;
... ... @@ -186,9 +185,10 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
186 185 }
187 186 }
188 187 gf_do_final_region_alignment(&rd);
189   -#endif
190 188 }
  189 +#endif
191 190  
  191 +#if defined(INTEL_SSE4_PCLMUL)
192 192 static
193 193 void
194 194 gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
... ... @@ -197,8 +197,6 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
197 197 uint16_t *s16;
198 198 uint16_t *d16;
199 199  
200   -#if defined(INTEL_SSE4_PCLMUL)
201   -
202 200 __m128i a, b;
203 201 __m128i result;
204 202 __m128i prim_poly;
... ... @@ -255,9 +253,10 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
255 253 }
256 254 }
257 255 gf_do_final_region_alignment(&rd);
258   -#endif
259 256 }
  257 +#endif
260 258  
  259 +#if defined(INTEL_SSE4_PCLMUL)
261 260 static
262 261 void
263 262 gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
... ... @@ -266,8 +265,6 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
266 265 uint16_t *s16;
267 266 uint16_t *d16;
268 267  
269   -#if defined(INTEL_SSE4_PCLMUL)
270   -
271 268 __m128i a, b;
272 269 __m128i result;
273 270 __m128i prim_poly;
... ... @@ -328,8 +325,8 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
328 325 }
329 326 }
330 327 gf_do_final_region_alignment(&rd);
331   -#endif
332 328 }
  329 +#endif
333 330  
334 331 static
335 332 inline
... ... @@ -605,13 +602,13 @@ int gf_w16_shift_init(gf_t *gf)
605 602 static
606 603 int gf_w16_cfm_init(gf_t *gf)
607 604 {
  605 +#if defined(INTEL_SSE4_PCLMUL)
608 606 gf_internal_t *h;
609 607  
610 608 h = (gf_internal_t *) gf->scratch;
611 609  
612 610 /*Ben: Determining how many reductions to do */
613 611  
614   -#if defined(INTEL_SSE4_PCLMUL)
615 612 if ((0xfe00 & h->prim_poly) == 0) {
616 613 gf->multiply.w32 = gf_w16_clm_multiply_2;
617 614 gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2;
... ... @@ -1548,11 +1545,11 @@ gf_w16_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
1548 1545 prod = _mm_xor_si128(prod, t1); \
1549 1546 v = _mm_srli_epi64(v, 1); }
1550 1547  
  1548 +#ifdef INTEL_SSE2
1551 1549 static
1552 1550 void
1553 1551 gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
1554 1552 {
1555   -#ifdef INTEL_SSE2
1556 1553 int i;
1557 1554 uint8_t *s8, *d8;
1558 1555 uint32_t vrev;
... ... @@ -1609,14 +1606,14 @@ gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
1609 1606 s8 += 16;
1610 1607 }
1611 1608 gf_do_final_region_alignment(&rd);
1612   -#endif
1613 1609 }
  1610 +#endif
1614 1611  
  1612 +#ifdef INTEL_SSE2
1615 1613 static
1616 1614 void
1617 1615 gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
1618 1616 {
1619   -#ifdef INTEL_SSE2
1620 1617 int i;
1621 1618 uint8_t *d8, *s8, tb;
1622 1619 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1635,14 +1632,14 @@ gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *
1635 1632 d8 += 16;
1636 1633 s8 += 16;
1637 1634 }
1638   -#endif
1639 1635 }
  1636 +#endif
1640 1637  
  1638 +#ifdef INTEL_SSE2
1641 1639 static
1642 1640 void
1643 1641 gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
1644 1642 {
1645   -#ifdef INTEL_SSE2
1646 1643 int i;
1647 1644 uint8_t *d8, *s8, tb;
1648 1645 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1663,15 +1660,15 @@ gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *bt
1663 1660 d8 += 16;
1664 1661 s8 += 16;
1665 1662 }
1666   -#endif
1667 1663 }
  1664 +#endif
1668 1665  
1669 1666  
  1667 +#ifdef INTEL_SSE2
1670 1668 static
1671 1669 void
1672 1670 gf_w16_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
1673 1671 {
1674   -#ifdef INTEL_SSE2
1675 1672 int itb;
1676 1673 uint8_t *d8, *s8;
1677 1674 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1719,8 +1716,8 @@ gf_w16_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
1719 1716 }
1720 1717  
1721 1718 gf_do_final_region_alignment(&rd);
1722   -#endif
1723 1719 }
  1720 +#endif
1724 1721  
1725 1722 static
1726 1723 void
... ...
src/gf_w32.c
... ... @@ -120,13 +120,13 @@ xor)
120 120 }
121 121 }
122 122  
  123 +#if defined(INTEL_SSE4_PCLMUL)
  124 +
123 125 static
124 126 void
125 127 gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
126 128 {
127 129  
128   -#if defined(INTEL_SSE4_PCLMUL)
129   -
130 130 int i;
131 131 uint32_t *s32;
132 132 uint32_t *d32;
... ... @@ -167,16 +167,16 @@ gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32
167 167 d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
168 168 }
169 169 }
170   -#endif
171 170 }
  171 +#endif
  172 +
  173 +#if defined(INTEL_SSE4_PCLMUL)
172 174  
173 175 static
174 176 void
175 177 gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
176 178 {
177 179  
178   -#if defined(INTEL_SSE4_PCLMUL)
179   -
180 180 int i;
181 181 uint32_t *s32;
182 182 uint32_t *d32;
... ... @@ -222,14 +222,14 @@ gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32
222 222 d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
223 223 }
224 224 }
225   -#endif
226 225 }
  226 +#endif
227 227  
  228 +#if defined(INTEL_SSE4_PCLMUL)
228 229 static
229 230 void
230 231 gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
231 232 {
232   -#if defined(INTEL_SSE4_PCLMUL)
233 233 int i;
234 234 uint32_t *s32;
235 235 uint32_t *d32;
... ... @@ -279,8 +279,8 @@ gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32
279 279 d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
280 280 }
281 281 }
282   -#endif
283 282 }
  283 +#endif
284 284  
285 285 static
286 286 inline
... ... @@ -983,11 +983,11 @@ gf_w32_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
983 983 prod = _mm_xor_si128(prod, t1); \
984 984 v = _mm_srli_epi64(v, 1); }
985 985  
  986 +#ifdef INTEL_SSE2
986 987 static
987 988 void
988 989 gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
989 990 {
990   -#ifdef INTEL_SSE2
991 991 int i;
992 992 uint8_t *s8, *d8;
993 993 uint32_t vrev;
... ... @@ -1036,8 +1036,8 @@ gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
1036 1036 s8 += 16;
1037 1037 }
1038 1038 gf_do_final_region_alignment(&rd);
1039   -#endif
1040 1039 }
  1040 +#endif
1041 1041  
1042 1042 static
1043 1043 void
... ... @@ -1177,11 +1177,11 @@ gf_w32_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
1177 1177 gf_do_final_region_alignment(&rd);
1178 1178 }
1179 1179  
  1180 +#ifdef INTEL_SSE2
1180 1181 static
1181 1182 void
1182 1183 gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
1183 1184 {
1184   -#ifdef INTEL_SSE2
1185 1185 int i;
1186 1186 uint8_t *d8, *s8, tb;
1187 1187 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1200,14 +1200,14 @@ gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *
1200 1200 d8 += 16;
1201 1201 s8 += 16;
1202 1202 }
1203   -#endif
1204 1203 }
  1204 +#endif
1205 1205  
  1206 +#ifdef INTEL_SSE2
1206 1207 static
1207 1208 void
1208 1209 gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
1209 1210 {
1210   -#ifdef INTEL_SSE2
1211 1211 int i;
1212 1212 uint8_t *d8, *s8, tb;
1213 1213 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1228,15 +1228,15 @@ gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *bt
1228 1228 d8 += 16;
1229 1229 s8 += 16;
1230 1230 }
1231   -#endif
1232 1231 }
  1232 +#endif
1233 1233  
1234 1234  
  1235 +#ifdef INTEL_SSE2
1235 1236 static
1236 1237 void
1237 1238 gf_w32_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
1238 1239 {
1239   -#ifdef INTEL_SSE2
1240 1240 uint32_t itb;
1241 1241 uint8_t *d8, *s8;
1242 1242 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1284,8 +1284,8 @@ gf_w32_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
1284 1284 }
1285 1285  
1286 1286 gf_do_final_region_alignment(&rd);
1287   -#endif
1288 1287 }
  1288 +#endif
1289 1289  
1290 1290 static
1291 1291 int gf_w32_bytwo_init(gf_t *gf)
... ... @@ -1552,11 +1552,11 @@ gf_w32_split_2_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t
1552 1552 gf_do_final_region_alignment(&rd);
1553 1553 }
1554 1554  
  1555 +#ifdef INTEL_SSSE3
1555 1556 static
1556 1557 void
1557 1558 gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
1558 1559 {
1559   -#ifdef INTEL_SSSE3
1560 1560 gf_internal_t *h;
1561 1561 int i, m, j, tindex;
1562 1562 uint32_t pp, v, v2, s, *s32, *d32, *top;
... ... @@ -1631,8 +1631,8 @@ gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
1631 1631  
1632 1632 gf_do_final_region_alignment(&rd);
1633 1633  
1634   -#endif
1635 1634 }
  1635 +#endif
1636 1636  
1637 1637 static
1638 1638 void
... ...
src/gf_w4.c
... ... @@ -414,11 +414,11 @@ gf_w4_single_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
414 414  
415 415 #define MM_PRINT(s, r) { uint8_t blah[16]; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (i = 0; i < 16; i++) printf(" %02x", blah[i]); printf("\n"); }
416 416  
  417 +#ifdef INTEL_SSSE3
417 418 static
418 419 void
419 420 gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
420 421 {
421   -#ifdef INTEL_SSSE3
422 422 gf_region_data rd;
423 423 uint8_t *base, *sptr, *dptr, *top;
424 424 __m128i tl, loset, h4, r, va, th;
... ... @@ -460,8 +460,8 @@ gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
460 460 }
461 461 gf_do_final_region_alignment(&rd);
462 462  
463   -#endif
464 463 }
  464 +#endif
465 465  
466 466 static
467 467 int gf_w4_single_table_init(gf_t *gf)
... ... @@ -916,11 +916,11 @@ gf_w4_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
916 916 prod = _mm_xor_si128(prod, t1); \
917 917 v = _mm_srli_epi64(v, 1); }
918 918  
  919 +#ifdef INTEL_SSE2
919 920 static
920 921 void
921 922 gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
922 923 {
923   -#ifdef INTEL_SSE2
924 924 int i;
925 925 uint8_t *s8, *d8;
926 926 uint8_t vrev;
... ... @@ -965,8 +965,8 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
965 965 s8 += 16;
966 966 }
967 967 gf_do_final_region_alignment(&rd);
968   -#endif
969 968 }
  969 +#endif
970 970  
971 971 /*
972 972 static
... ... @@ -1034,11 +1034,11 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
1034 1034 }
1035 1035 */
1036 1036  
  1037 +#ifdef INTEL_SSE2
1037 1038 static
1038 1039 void
1039 1040 gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1040 1041 {
1041   -#ifdef INTEL_SSE2
1042 1042 int i;
1043 1043 uint8_t *d8, *s8, tb;
1044 1044 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1057,14 +1057,14 @@ gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1057 1057 d8 += 16;
1058 1058 s8 += 16;
1059 1059 }
1060   -#endif
1061 1060 }
  1061 +#endif
1062 1062  
  1063 +#ifdef INTEL_SSE2
1063 1064 static
1064 1065 void
1065 1066 gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1066 1067 {
1067   -#ifdef INTEL_SSE2
1068 1068 int i;
1069 1069 uint8_t *d8, *s8, tb;
1070 1070 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1085,14 +1085,14 @@ gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1085 1085 d8 += 16;
1086 1086 s8 += 16;
1087 1087 }
1088   -#endif
1089 1088 }
  1089 +#endif
1090 1090  
  1091 +#ifdef INTEL_SSE2
1091 1092 static
1092 1093 void
1093 1094 gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1094 1095 {
1095   -#ifdef INTEL_SSE2
1096 1096 int i;
1097 1097 uint8_t *d8, *s8, tb;
1098 1098 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1112,14 +1112,14 @@ gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1112 1112 d8 += 16;
1113 1113 s8 += 16;
1114 1114 }
1115   -#endif
1116 1115 }
  1116 +#endif
1117 1117  
  1118 +#ifdef INTEL_SSE2
1118 1119 static
1119 1120 void
1120 1121 gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1121 1122 {
1122   -#ifdef INTEL_SSE2
1123 1123 int i;
1124 1124 uint8_t *d8, *s8, tb;
1125 1125 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1141,15 +1141,15 @@ gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1141 1141 d8 += 16;
1142 1142 s8 += 16;
1143 1143 }
1144   -#endif
1145 1144 }
  1145 +#endif
1146 1146  
1147 1147  
  1148 +#ifdef INTEL_SSE2
1148 1149 static
1149 1150 void
1150 1151 gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1151 1152 {
1152   -#ifdef INTEL_SSE2
1153 1153 int i;
1154 1154 uint8_t *d8, *s8, tb;
1155 1155 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1170,14 +1170,14 @@ gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1170 1170 d8 += 16;
1171 1171 s8 += 16;
1172 1172 }
1173   -#endif
1174 1173 }
  1174 +#endif
1175 1175  
  1176 +#ifdef INTEL_SSE2
1176 1177 static
1177 1178 void
1178 1179 gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1179 1180 {
1180   -#ifdef INTEL_SSE2
1181 1181 int i;
1182 1182 uint8_t *d8, *s8, tb;
1183 1183 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1198,14 +1198,14 @@ gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1198 1198 d8 += 16;
1199 1199 s8 += 16;
1200 1200 }
1201   -#endif
1202 1201 }
  1202 +#endif
1203 1203  
  1204 +#ifdef INTEL_SSE2
1204 1205 static
1205 1206 void
1206 1207 gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1207 1208 {
1208   -#ifdef INTEL_SSE2
1209 1209 int i;
1210 1210 uint8_t *d8, *s8, tb;
1211 1211 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1227,14 +1227,14 @@ gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1227 1227 d8 += 16;
1228 1228 s8 += 16;
1229 1229 }
1230   -#endif
1231 1230 }
  1231 +#endif
1232 1232  
  1233 +#ifdef INTEL_SSE2
1233 1234 static
1234 1235 void
1235 1236 gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1236 1237 {
1237   -#ifdef INTEL_SSE2
1238 1238 int i;
1239 1239 uint8_t *d8, *s8, tb;
1240 1240 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1256,14 +1256,14 @@ gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1256 1256 d8 += 16;
1257 1257 s8 += 16;
1258 1258 }
1259   -#endif
1260 1259 }
  1260 +#endif
1261 1261  
  1262 +#ifdef INTEL_SSE2
1262 1263 static
1263 1264 void
1264 1265 gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1265 1266 {
1266   -#ifdef INTEL_SSE2
1267 1267 int i;
1268 1268 uint8_t *d8, *s8, tb;
1269 1269 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1286,14 +1286,14 @@ gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1286 1286 d8 += 16;
1287 1287 s8 += 16;
1288 1288 }
1289   -#endif
1290 1289 }
  1290 +#endif
1291 1291  
  1292 +#ifdef INTEL_SSE2
1292 1293 static
1293 1294 void
1294 1295 gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1295 1296 {
1296   -#ifdef INTEL_SSE2
1297 1297 int i;
1298 1298 uint8_t *d8, *s8, tb;
1299 1299 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1316,14 +1316,14 @@ gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1316 1316 d8 += 16;
1317 1317 s8 += 16;
1318 1318 }
1319   -#endif
1320 1319 }
  1320 +#endif
1321 1321  
  1322 +#ifdef INTEL_SSE2
1322 1323 static
1323 1324 void
1324 1325 gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1325 1326 {
1326   -#ifdef INTEL_SSE2
1327 1327 int i;
1328 1328 uint8_t *d8, *s8, tb;
1329 1329 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1345,14 +1345,14 @@ gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
1345 1345 d8 += 16;
1346 1346 s8 += 16;
1347 1347 }
1348   -#endif
1349 1348 }
  1349 +#endif
1350 1350  
  1351 +#ifdef INTEL_SSE2
1351 1352 static
1352 1353 void
1353 1354 gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1354 1355 {
1355   -#ifdef INTEL_SSE2
1356 1356 int i;
1357 1357 uint8_t *d8, *s8, tb;
1358 1358 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1374,14 +1374,14 @@ gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
1374 1374 d8 += 16;
1375 1375 s8 += 16;
1376 1376 }
1377   -#endif
1378 1377 }
  1378 +#endif
1379 1379  
  1380 +#ifdef INTEL_SSE2
1380 1381 static
1381 1382 void
1382 1383 gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
1383 1384 {
1384   -#ifdef INTEL_SSE2
1385 1385 uint8_t *d8, *s8, tb;
1386 1386 __m128i pp, m1, m2, t1, t2, va, vb;
1387 1387 struct gf_bytwo_data *btd;
... ... @@ -1489,8 +1489,8 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
1489 1489 }
1490 1490 }
1491 1491 gf_do_final_region_alignment(&rd);
1492   -#endif
1493 1492 }
  1493 +#endif
1494 1494  
1495 1495 static
1496 1496 void
... ...
src/gf_w64.c
... ... @@ -87,6 +87,7 @@ xor)
87 87 }
88 88 }
89 89  
  90 +#if defined(INTEL_SSE4_PCLMUL)
90 91 static
91 92 void
92 93 gf_w64_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
... ... @@ -96,7 +97,6 @@ xor)
96 97 gf_val_64_t *s64, *d64, *top;
97 98 gf_region_data rd;
98 99  
99   -#if defined(INTEL_SSE4_PCLMUL)
100 100 __m128i a, b;
101 101 __m128i result, r1;
102 102 __m128i prim_poly;
... ... @@ -175,9 +175,10 @@ xor)
175 175 }
176 176 }
177 177 gf_do_final_region_alignment(&rd);
178   -#endif
179 178 }
  179 +#endif
180 180  
  181 +#if defined(INTEL_SSE4_PCLMUL)
181 182 static
182 183 void
183 184 gf_w64_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
... ... @@ -187,7 +188,6 @@ xor)
187 188 gf_val_64_t *s64, *d64, *top;
188 189 gf_region_data rd;
189 190  
190   -#if defined(INTEL_SSE4_PCLMUL)
191 191 __m128i a, b;
192 192 __m128i result, r1;
193 193 __m128i prim_poly;
... ... @@ -263,8 +263,8 @@ xor)
263 263 }
264 264 }
265 265 gf_do_final_region_alignment(&rd);
266   -#endif
267 266 }
  267 +#endif
268 268  
269 269 static
270 270 inline
... ... @@ -1369,11 +1369,11 @@ void gf_w64_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
1369 1369 #endif
1370 1370 }
1371 1371  
  1372 +#ifdef INTEL_SSE2
1372 1373 static
1373 1374 void
1374 1375 gf_w64_bytwo_b_sse_region_2_xor(gf_region_data *rd)
1375 1376 {
1376   -#ifdef INTEL_SSE2
1377 1377 int i;
1378 1378 uint64_t one64, amask;
1379 1379 uint8_t *d8, *s8, tb;
... ... @@ -1400,14 +1400,14 @@ gf_w64_bytwo_b_sse_region_2_xor(gf_region_data *rd)
1400 1400 d8 += 16;
1401 1401 s8 += 16;
1402 1402 }
1403   -#endif
1404 1403 }
  1404 +#endif
1405 1405  
  1406 +#ifdef INTEL_SSE2
1406 1407 static
1407 1408 void
1408 1409 gf_w64_bytwo_b_sse_region_2_noxor(gf_region_data *rd)
1409 1410 {
1410   -#ifdef INTEL_SSE2
1411 1411 int i;
1412 1412 uint64_t one64, amask;
1413 1413 uint8_t *d8, *s8, tb;
... ... @@ -1432,14 +1432,14 @@ gf_w64_bytwo_b_sse_region_2_noxor(gf_region_data *rd)
1432 1432 d8 += 16;
1433 1433 s8 += 16;
1434 1434 }
1435   -#endif
1436 1435 }
  1436 +#endif
1437 1437  
  1438 +#ifdef INTEL_SSE2
1438 1439 static
1439 1440 void
1440 1441 gf_w64_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
1441 1442 {
1442   -#ifdef INTEL_SSE2
1443 1443 uint64_t itb, amask, one64;
1444 1444 uint8_t *d8, *s8;
1445 1445 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1490,8 +1490,8 @@ gf_w64_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t
1490 1490 }
1491 1491  
1492 1492 gf_do_final_region_alignment(&rd);
1493   -#endif
1494 1493 }
  1494 +#endif
1495 1495  
1496 1496  
1497 1497 static
... ... @@ -1712,11 +1712,11 @@ int gf_w64_composite_init(gf_t *gf)
1712 1712 return 1;
1713 1713 }
1714 1714  
  1715 +#ifdef INTEL_SSSE3
1715 1716 static
1716 1717 void
1717 1718 gf_w64_split_4_64_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
1718 1719 {
1719   -#ifdef INTEL_SSSE3
1720 1720 gf_internal_t *h;
1721 1721 int i, m, j, k, tindex;
1722 1722 uint64_t pp, v, s, *s64, *d64, *top;
... ... @@ -1793,14 +1793,14 @@ gf_w64_split_4_64_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
1793 1793 }
1794 1794 }
1795 1795 gf_do_final_region_alignment(&rd);
1796   -#endif
1797 1796 }
  1797 +#endif
1798 1798  
  1799 +#ifdef INTEL_SSE4
1799 1800 static
1800 1801 void
1801 1802 gf_w64_split_4_64_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
1802 1803 {
1803   -#ifdef INTEL_SSE4
1804 1804 gf_internal_t *h;
1805 1805 int i, m, j, k, tindex;
1806 1806 uint64_t pp, v, s, *s64, *d64, *top;
... ... @@ -1997,8 +1997,8 @@ gf_w64_split_4_64_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
1997 1997 }
1998 1998  
1999 1999 gf_do_final_region_alignment(&rd);
2000   -#endif
2001 2000 }
  2001 +#endif
2002 2002  
2003 2003 #define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1);
2004 2004  
... ...
src/gf_w8.c
... ... @@ -364,6 +364,7 @@ gf_w8_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t v
364 364 gf_do_final_region_alignment(&rd);
365 365 }
366 366  
  367 +#if defined(INTEL_SSE4_PCLMUL)
367 368 static
368 369 void
369 370 gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
... ... @@ -373,8 +374,6 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
373 374 uint8_t *s8;
374 375 uint8_t *d8;
375 376  
376   -#if defined(INTEL_SSE4_PCLMUL)
377   -
378 377 __m128i a, b;
379 378 __m128i result;
380 379 __m128i prim_poly;
... ... @@ -420,9 +419,10 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
420 419 }
421 420 }
422 421 gf_do_final_region_alignment(&rd);
423   -#endif
424 422 }
  423 +#endif
425 424  
  425 +#if defined(INTEL_SSE4_PCLMUL)
426 426 static
427 427 void
428 428 gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
... ... @@ -432,8 +432,6 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
432 432 uint8_t *s8;
433 433 uint8_t *d8;
434 434  
435   -#if defined(INTEL_SSE4_PCLMUL)
436   -
437 435 __m128i a, b;
438 436 __m128i result;
439 437 __m128i prim_poly;
... ... @@ -483,9 +481,10 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
483 481 }
484 482 }
485 483 gf_do_final_region_alignment(&rd);
486   -#endif
487 484 }
  485 +#endif
488 486  
  487 +#if defined(INTEL_SSE4_PCLMUL)
489 488 static
490 489 void
491 490 gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
... ... @@ -495,8 +494,6 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
495 494 uint8_t *s8;
496 495 uint8_t *d8;
497 496  
498   -#if defined(INTEL_SSE4_PCLMUL)
499   -
500 497 __m128i a, b;
501 498 __m128i result;
502 499 __m128i prim_poly;
... ... @@ -550,8 +547,8 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
550 547 }
551 548 }
552 549 gf_do_final_region_alignment(&rd);
553   -#endif
554 550 }
  551 +#endif
555 552  
556 553 /* ------------------------------------------------------------
557 554 IMPLEMENTATION: SHIFT:
... ... @@ -588,11 +585,11 @@ gf_w8_shift_multiply (gf_t *gf, uint32_t a8, uint32_t b8)
588 585 static
589 586 int gf_w8_cfm_init(gf_t *gf)
590 587 {
  588 +#if defined(INTEL_SSE4_PCLMUL)
591 589 gf_internal_t *h;
592 590  
593 591 h = (gf_internal_t *) gf->scratch;
594 592  
595   -#if defined(INTEL_SSE4_PCLMUL)
596 593 if ((0xe0 & h->prim_poly) == 0){
597 594 gf->multiply.w32 = gf_w8_clm_multiply_2;
598 595 gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_2;
... ... @@ -941,6 +938,7 @@ gf_w8_default_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
941 938 return (ftd->multtable[a][b]);
942 939 }
943 940  
  941 +#ifdef INTEL_SSSE3
944 942 static
945 943 gf_val_32_t
946 944 gf_w8_default_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
... ... @@ -950,6 +948,7 @@ gf_w8_default_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
950 948 ftd = (struct gf_w8_default_data *) ((gf_internal_t *) gf->scratch)->private;
951 949 return (ftd->divtable[a][b]);
952 950 }
  951 +#endif
953 952  
954 953 static
955 954 gf_val_32_t
... ... @@ -1054,11 +1053,11 @@ gf_w8_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, in
1054 1053 }
1055 1054 }
1056 1055  
  1056 +#ifdef INTEL_SSSE3
1057 1057 static
1058 1058 void
1059 1059 gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
1060 1060 {
1061   -#ifdef INTEL_SSSE3
1062 1061 uint8_t *s8, *d8, *bh, *bl, *sptr, *dptr, *top;
1063 1062 __m128i tbl, loset, t1, r, va, mth, mtl;
1064 1063 uint64_t altable[4];
... ... @@ -1114,8 +1113,8 @@ gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val
1114 1113 }
1115 1114  
1116 1115 gf_do_final_region_alignment(&rd);
1117   -#endif
1118 1116 }
  1117 +#endif
1119 1118  
1120 1119  
1121 1120 /* ------------------------------------------------------------
... ... @@ -1669,11 +1668,11 @@ gf_w8_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
1669 1668 prod = _mm_xor_si128(prod, t1); \
1670 1669 v = _mm_srli_epi64(v, 1); }
1671 1670  
  1671 +#ifdef INTEL_SSE2
1672 1672 static
1673 1673 void
1674 1674 gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
1675 1675 {
1676   -#ifdef INTEL_SSE2
1677 1676 int i;
1678 1677 uint8_t *s8, *d8;
1679 1678 uint8_t vrev;
... ... @@ -1722,14 +1721,14 @@ gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
1722 1721 s8 += 16;
1723 1722 }
1724 1723 gf_do_final_region_alignment(&rd);
1725   -#endif
1726 1724 }
  1725 +#endif
1727 1726  
  1727 +#ifdef INTEL_SSE2
1728 1728 static
1729 1729 void
1730 1730 gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
1731 1731 {
1732   -#ifdef INTEL_SSE2
1733 1732 int i;
1734 1733 uint8_t *d8, *s8, tb;
1735 1734 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1748,14 +1747,14 @@ gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *bt
1748 1747 d8 += 16;
1749 1748 s8 += 16;
1750 1749 }
1751   -#endif
1752 1750 }
  1751 +#endif
1753 1752  
  1753 +#ifdef INTEL_SSE2
1754 1754 static
1755 1755 void
1756 1756 gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
1757 1757 {
1758   -#ifdef INTEL_SSE2
1759 1758 int i;
1760 1759 uint8_t *d8, *s8, tb;
1761 1760 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1776,15 +1775,15 @@ gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
1776 1775 d8 += 16;
1777 1776 s8 += 16;
1778 1777 }
1779   -#endif
1780 1778 }
  1779 +#endif
1781 1780  
1782 1781  
  1782 +#ifdef INTEL_SSE2
1783 1783 static
1784 1784 void
1785 1785 gf_w8_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
1786 1786 {
1787   -#ifdef INTEL_SSE2
1788 1787 int itb;
1789 1788 uint8_t *d8, *s8;
1790 1789 __m128i pp, m1, m2, t1, t2, va, vb;
... ... @@ -1832,8 +1831,8 @@ gf_w8_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
1832 1831 }
1833 1832  
1834 1833 gf_do_final_region_alignment(&rd);
1835   -#endif
1836 1834 }
  1835 +#endif
1837 1836  
1838 1837 static
1839 1838 void
... ...