Commit 8a96434c55f6c7718d6e56a9f94fe9ef3157f161

Authored by animetosho
1 parent d59cbb2a

Eliminate 64-bit insert/extract for w=32 and w=64

Enables these to compile for 32-bit targets
Showing 2 changed files with 13 additions and 13 deletions   Show diff stats
src/gf_w32.c
... ... @@ -368,8 +368,8 @@ gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
368 368  
369 369 a = _mm_insert_epi32 (_mm_setzero_si128(), a32, 0);
370 370 b = _mm_insert_epi32 (a, b32, 0);
371   - g = _mm_insert_epi64 (a, g_star, 0);
372   - q = _mm_insert_epi64 (a, q_plus, 0);
  371 + g = _mm_cvtsi64_si128 (g_star);
  372 + q = _mm_cvtsi64_si128 (q_plus);
373 373  
374 374 result = _mm_clmulepi64_si128 (a, b, 0);
375 375 w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
... ... @@ -406,8 +406,8 @@ gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32
406 406 q_plus = *(uint64_t *) h->private;
407 407 g_star = *((uint64_t *) h->private + 1);
408 408  
409   - g = _mm_insert_epi64 (a, g_star, 0);
410   - q = _mm_insert_epi64 (a, q_plus, 0);
  409 + g = _mm_cvtsi64_si128 (g_star);
  410 + q = _mm_cvtsi64_si128 (q_plus);
411 411 a = _mm_insert_epi32 (_mm_setzero_si128(), val, 0);
412 412 s32 = (uint32_t *) src;
413 413 d32 = (uint32_t *) dest;
... ...
src/gf_w64.c
... ... @@ -79,7 +79,7 @@ xor)
79 79 gf_do_initial_region_alignment(&rd);
80 80  
81 81 prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0xffffffffULL));
82   - b = _mm_insert_epi64 (_mm_setzero_si128(), val, 0);
  82 + b = _mm_cvtsi64_si128 (val);
83 83 m1 = _mm_set_epi32(0, 0, 0, (uint32_t)0xffffffff);
84 84 m3 = _mm_slli_si128(m1, 8);
85 85 m4 = _mm_slli_si128(m3, 4);
... ... @@ -166,7 +166,7 @@ xor)
166 166 gf_do_initial_region_alignment(&rd);
167 167  
168 168 prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0xffffffffULL));
169   - b = _mm_insert_epi64 (_mm_setzero_si128(), val, 0);
  169 + b = _mm_cvtsi64_si128 (val);
170 170 m1 = _mm_set_epi32(0, 0, 0, (uint32_t)0xffffffff);
171 171 m3 = _mm_slli_si128(m1, 8);
172 172 m4 = _mm_slli_si128(m3, 4);
... ... @@ -353,8 +353,8 @@ gf_w64_clm_multiply_2 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
353 353 __m128i v, w;
354 354 gf_internal_t * h = gf->scratch;
355 355  
356   - a = _mm_insert_epi64 (_mm_setzero_si128(), a64, 0);
357   - b = _mm_insert_epi64 (a, b64, 0);
  356 + a = _mm_cvtsi64_si128 (a64);
  357 + b = _mm_cvtsi64_si128 (b64);
358 358 prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0xffffffffULL));
359 359 /* Do the initial multiply */
360 360  
... ... @@ -375,7 +375,7 @@ gf_w64_clm_multiply_2 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
375 375 w = _mm_clmulepi64_si128 (prim_poly, v, 0);
376 376 result = _mm_xor_si128 (result, w);
377 377  
378   - rv = ((gf_val_64_t)_mm_extract_epi64(result, 0));
  378 + rv = ((gf_val_64_t)_mm_cvtsi128_si64(result));
379 379 #endif
380 380 return rv;
381 381 }
... ... @@ -395,8 +395,8 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
395 395 __m128i v, w;
396 396 gf_internal_t * h = gf->scratch;
397 397  
398   - a = _mm_insert_epi64 (_mm_setzero_si128(), a64, 0);
399   - b = _mm_insert_epi64 (a, b64, 0);
  398 + a = _mm_cvtsi64_si128 (a64);
  399 + b = _mm_cvtsi64_si128 (b64);
400 400 prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0xffffffffULL));
401 401  
402 402 /* Do the initial multiply */
... ... @@ -417,7 +417,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
417 417 w = _mm_clmulepi64_si128 (prim_poly, v, 0);
418 418 result = _mm_xor_si128 (result, w);
419 419  
420   - rv = ((gf_val_64_t)_mm_extract_epi64(result, 0));
  420 + rv = ((gf_val_64_t)_mm_cvtsi128_si64(result));
421 421 #endif
422 422 return rv;
423 423 }
... ... @@ -444,7 +444,7 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
444 444 d8 = (uint8_t *) rd.d_start;
445 445 dtop = (uint8_t *) rd.d_top;
446 446  
447   - v = _mm_insert_epi64(_mm_setzero_si128(), val, 0);
  447 + v = _mm_cvtsi64_si128(val);
448 448 m = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
449 449 prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0xffffffffULL));
450 450  
... ...