Commit 6bb1ebb9f4579d8faafb6124da33178da217a133

Authored by Adam Disney
1 parent 9d53ea59
Exists in master and in 3 other branches v2, v3, wip-18092

Implemented CARRY_FREE_GK. Sections added are tagged with a comment //ADAM

for easy navigation.
include/gf_complete.h
... ... @@ -33,17 +33,18 @@
33 33 Not all are implemented for all values of w.
34 34 See the paper for an explanation of how they work. */
35 35  
36   -typedef enum {GF_MULT_DEFAULT,
37   - GF_MULT_SHIFT,
38   - GF_MULT_CARRY_FREE,
39   - GF_MULT_GROUP,
  36 +typedef enum {GF_MULT_DEFAULT,
  37 + GF_MULT_SHIFT,
  38 + GF_MULT_CARRY_FREE,
  39 + GF_MULT_CARRY_FREE_GK, //ADAM
  40 + GF_MULT_GROUP,
40 41 GF_MULT_BYTWO_p,
41 42 GF_MULT_BYTWO_b,
42   - GF_MULT_TABLE,
43   - GF_MULT_LOG_TABLE,
  43 + GF_MULT_TABLE,
  44 + GF_MULT_LOG_TABLE,
44 45 GF_MULT_LOG_ZERO,
45 46 GF_MULT_LOG_ZERO_EXT,
46   - GF_MULT_SPLIT_TABLE,
  47 + GF_MULT_SPLIT_TABLE,
47 48 GF_MULT_COMPOSITE } gf_mult_type_t;
48 49  
49 50 /* These are the different ways to optimize region
... ...
src/gf.c
... ... @@ -286,6 +286,16 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
286 286 return 1;
287 287 }
288 288  
  289 + //ADAM
  290 + if (mult_type == GF_MULT_CARRY_FREE_GK) {
  291 + if (w != 4 && w != 8 && w != 16 &&
  292 + w != 32 && w != 64 && w != 128) { _gf_errno = GF_E_CFM___W; return 0; }
  293 + if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; }
  294 + if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; }
  295 + if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; }
  296 + return 1;
  297 + }
  298 +
289 299 if (mult_type == GF_MULT_BYTWO_p || mult_type == GF_MULT_BYTWO_b) {
290 300 if (raltmap) { _gf_errno = GF_E_ALT_BY2; return 0; }
291 301 if (rsse && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; }
... ...
src/gf_method.c
... ... @@ -47,6 +47,10 @@ int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
47 47 } else if (strcmp(argv[starting], "CARRY_FREE") == 0) {
48 48 mult_type = GF_MULT_CARRY_FREE;
49 49 starting++;
  50 + //ADAM
  51 + } else if (strcmp(argv[starting], "CARRY_FREE_GK") == 0) {
  52 + mult_type = GF_MULT_CARRY_FREE_GK;
  53 + starting++;
50 54 } else if (strcmp(argv[starting], "GROUP") == 0) {
51 55 mult_type = GF_MULT_GROUP;
52 56 if (argc < starting + 3) {
... ...
src/gf_w32.c
... ... @@ -399,7 +399,94 @@ uint32_t gf_w32_matrix (gf_t *gf, uint32_t b)
399 399 extra memory.
400 400 */
401 401  
  402 +//ADAM
  403 +static
  404 +inline
  405 +gf_val_32_t
  406 +gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
  407 +{
  408 + gf_val_32_t rv = 0;
  409 +
  410 +#if defined(INTEL_SSE4_PCLMUL)
  411 +
  412 + __m128i a, b;
  413 + __m128i result;
  414 + __m128i w;
  415 + __m128i g, q;
  416 + gf_internal_t * h = gf->scratch;
  417 + uint64_t g_star, q_plus;
402 418  
  419 + q_plus = *(uint64_t *) h->private;
  420 + g_star = *((uint64_t *) h->private + 1);
  421 +
  422 + a = _mm_insert_epi32 (_mm_setzero_si128(), a32, 0);
  423 + b = _mm_insert_epi32 (a, b32, 0);
  424 + g = _mm_insert_epi64 (a, g_star, 0);
  425 + q = _mm_insert_epi64 (a, q_plus, 0);
  426 +
  427 + result = _mm_clmulepi64_si128 (a, b, 0);
  428 + w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
  429 + w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
  430 + result = _mm_xor_si128 (result, w);
  431 +
  432 + /* Extracts 32 bit value from result. */
  433 + rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
  434 +#endif
  435 + return rv;
  436 +}
  437 +
  438 +//ADAM
  439 +#if defined(INTEL_SSE4_PCLMUL)
  440 +
  441 +static
  442 +void
  443 +gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
  444 +{
  445 +
  446 + int i;
  447 + uint32_t *s32;
  448 + uint32_t *d32;
  449 +
  450 + __m128i a, b;
  451 + __m128i result;
  452 + __m128i w;
  453 + __m128i g, q;
  454 + gf_internal_t * h = gf->scratch;
  455 + uint64_t g_star, q_plus;
  456 +
  457 + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
  458 + if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
  459 +
  460 + q_plus = *(uint64_t *) h->private;
  461 + g_star = *((uint64_t *) h->private + 1);
  462 +
  463 + g = _mm_insert_epi64 (a, g_star, 0);
  464 + q = _mm_insert_epi64 (a, q_plus, 0);
  465 + a = _mm_insert_epi32 (_mm_setzero_si128(), val, 0);
  466 + s32 = (uint32_t *) src;
  467 + d32 = (uint32_t *) dest;
  468 +
  469 + if (xor) {
  470 + for (i = 0; i < bytes/sizeof(uint32_t); i++) {
  471 + b = _mm_insert_epi32 (a, s32[i], 0);
  472 + result = _mm_clmulepi64_si128 (a, b, 0);
  473 + w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
  474 + w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
  475 + result = _mm_xor_si128 (result, w);
  476 + d32[i] ^= ((gf_val_32_t)_mm_extract_epi32(result, 0));
  477 + }
  478 + } else {
  479 + for (i = 0; i < bytes/sizeof(uint32_t); i++) {
  480 + b = _mm_insert_epi32 (a, s32[i], 0);
  481 + result = _mm_clmulepi64_si128 (a, b, 0);
  482 + w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
  483 + w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
  484 + result = _mm_xor_si128 (result, w);
  485 + d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
  486 + }
  487 + }
  488 +}
  489 +#endif
403 490  
404 491  
405 492 static
... ... @@ -446,6 +533,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
446 533 #endif
447 534 return rv;
448 535 }
  536 +
449 537 static
450 538 inline
451 539 gf_val_32_t
... ... @@ -552,6 +640,45 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32)
552 640 return product;
553 641 }
554 642  
  643 +//ADAM
  644 + static
  645 +int gf_w32_cfmgk_init(gf_t *gf)
  646 +{
  647 + gf->inverse.w32 = gf_w32_euclid;
  648 + gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
  649 +
  650 +#if defined(INTEL_SSE4_PCLMUL)
  651 + gf_internal_t *h;
  652 +
  653 + h = (gf_internal_t *) gf->scratch;
  654 + gf->multiply.w32 = gf_w32_cfmgk_multiply;
  655 + gf->multiply_region.w32 = gf_w32_cfmgk_multiply_region_from_single;
  656 +
  657 + //setup in the private section the q+ and g* ADAM
  658 + uint64_t *q_plus = (uint64_t *) h->private;
  659 + uint64_t *g_star = (uint64_t *) h->private + 1;
  660 +
  661 + //q+
  662 + uint64_t tmp = h->prim_poly << 32;
  663 + *q_plus = 1ULL << 32;
  664 +
  665 + int i;
  666 + for(i = 63; i >= 32; i--)
  667 + if((1ULL << i) & tmp)
  668 + {
  669 + *q_plus |= 1ULL << (i-32);
  670 + tmp ^= h->prim_poly << (i-32);
  671 + }
  672 +
  673 + //g*
  674 + *g_star = h->prim_poly & ((1ULL << 32) - 1);
  675 +
  676 + return 1;
  677 +#endif
  678 +
  679 + return 0;
  680 +}
  681 +
555 682 static
556 683 int gf_w32_cfm_init(gf_t *gf)
557 684 {
... ... @@ -2656,6 +2783,10 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg
2656 2783 case GF_MULT_CARRY_FREE:
2657 2784 return sizeof(gf_internal_t);
2658 2785 break;
  2786 + //ADAM
  2787 + case GF_MULT_CARRY_FREE_GK:
  2788 + return sizeof(gf_internal_t) + sizeof(uint64_t)*2;
  2789 + break;
2659 2790 case GF_MULT_SHIFT:
2660 2791 return sizeof(gf_internal_t);
2661 2792 break;
... ... @@ -2703,14 +2834,15 @@ int gf_w32_init(gf_t *gf)
2703 2834 gf->multiply_region.w32 = NULL;
2704 2835  
2705 2836 switch(h->mult_type) {
2706   - case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
2707   - case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
2708   - case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
  2837 + case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
  2838 + case GF_MULT_CARRY_FREE_GK: if (gf_w32_cfmgk_init(gf) == 0) return 0; break; //ADAM
  2839 + case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
  2840 + case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
2709 2841 case GF_MULT_DEFAULT:
2710   - case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
2711   - case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break;
  2842 + case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
  2843 + case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break;
2712 2844 case GF_MULT_BYTWO_p:
2713   - case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break;
  2845 + case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break;
2714 2846 default: return 0;
2715 2847 }
2716 2848 if (h->divide_type == GF_DIVIDE_EUCLID) {
... ...
tools/gf_methods.c
... ... @@ -20,8 +20,9 @@
20 20 #define BNMULTS (8)
21 21 static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48",
22 22 "TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" };
23   -#define NMULTS (16)
24   -static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
  23 +//ADAM
  24 +#define NMULTS (17)
  25 +static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "CARRY_FREE_GK", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
25 26 "TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
26 27 "SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };
27 28  
... ...