Commit 70b6d55aee0256e8d656733004a142d798de0ef4

Authored by plank
0 parents
Exists in master and in 3 other branches v1, v2, v3

Big checkin after I've lost the others. Ha ha.



git-svn-id: svn://mamba.eecs.utk.edu/home/plank/svn/Galois-Library@78 36f187d4-5712-4624-889c-152d48957efa
GNUmakefile 0 → 100644
  1 +++ a/GNUmakefile
... ... @@ -0,0 +1,51 @@
  1 +#
  2 +# GNUmakefile for Galois field library
  3 +#
  4 +#
  5 +
  6 +SRCS = gf_w4.c gf_w8.c gf_w16.c gf_w32.c gf_w64.c gf_w128.c gf_wgen.c gf.c gf_unit.c gf_time.c gf_mult.c gf_method.c gf_54.c gf_methods.c gf_div.c gf_rand.c gf_general.c
  7 +HDRS = gf.h gf_int.h
  8 +EXECUTABLES = gf_mult gf_div gf_unit gf_time gf_54 gf_methods
  9 +CFLAGS = -O3 -msse4 -DINTEL_SSE4
  10 +# CFLAGS = -g
  11 +LDFLAGS = -O3 -msse4
  12 +RM = /bin/rm -f
  13 +
  14 +OBJS = $(addsuffix .o, $(basename $(SRCS)))
  15 +
  16 +DEFAULT = $(EXECUTABLES)
  17 +
  18 +default: $(DEFAULT)
  19 +
  20 +all: $(OBJS)
  21 +
  22 +gf_methods: gf_methods.o gf.o gf_method.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o
  23 +gf_time: gf_time.o gf.o gf_method.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o gf_rand.o gf_general.o
  24 +gf_unit: gf_unit.o gf.o gf_method.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o gf_rand.o gf_general.o
  25 +gf_mult: gf_mult.o gf.o gf_wgen.o gf_w4.o gf_method.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o
  26 +gf_div: gf_div.o gf.o gf_wgen.o gf_w4.o gf_method.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o
  27 +gf_54: gf_54.o gf.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o
  28 +
  29 +clean:
  30 + $(RM) $(OBJS) gf_div.c
  31 +
  32 +spotless: clean
  33 + $(RM) *~ $(EXECUTABLES)
  34 +
  35 +gf_div.o: gf.h gf_method.h
  36 +gf_methods.o: gf.h gf_method.h
  37 +gf_time.o: gf.h gf_method.h gf_rand.h gf_general.h
  38 +gf_wgen.o: gf_int.h gf.h
  39 +gf_w4.o: gf_int.h gf.h
  40 +gf_w8.o: gf_int.h gf.h
  41 +gf_w16.o: gf_int.h gf.h
  42 +gf_w32.o: gf_int.h gf.h
  43 +gf_w64.o: gf_int.h gf.h
  44 +gf_54.o: gf.h
  45 +gf_unit.o: gf.h gf_method.h gf_rand.h gf_general.h
  46 +gf_general.o: gf.h gf_int.h gf_general.h gf_rand.h
  47 +gf_mult.o: gf.h gf_method.h
  48 +gf_method.o: gf.h
  49 +
  50 +gf_div.c: gf_mult.c
  51 + sed 's/multiply/divide/g' gf_mult.c > gf_div.c
... ...
Log-Zero-for-w=8.odg 0 → 100644
No preview for this file type
README 0 → 100644
  1 +++ a/README
... ... @@ -0,0 +1 @@
  1 +This is a README file.
... ...
explanation.html 0 → 100644
  1 +++ a/explanation.html
... ... @@ -0,0 +1,777 @@
  1 +<h3>Code structure as of 7/20/2012</h3>
  2 +
  3 +written by Jim.
  4 +<p>
  5 +Ok -- once again, I have messed with the structure. My goal is flexible and efficient.
  6 +It's similar to the stuff before, but better because it makes things like Euclid's
  7 +method much cleaner.
  8 +<p>
  9 +I think we're ready to hack.
  10 +<p>
  11 +<p>
  12 +<hr>
  13 +<h3>Files</h3>
  14 +<UL>
  15 +<LI> <a href=GNUmakefile><b>GNUmakefile</b></a>: Makefile
  16 +<LI> <a href=README><b>README</b></a>: Empty readme
  17 +<LI> <a href=explanation.html><b>explanation.html</b></a>: This file.
  18 +<LI> <a href=gf.c><b>gf.c</b></a>: Main gf routines
  19 +<LI> <a href=gf.h><b>gf.h</b></a>: Main gf prototypes and typedefs
  20 +<LI> <a href=gf_int.h><b>gf_int.h</b></a>: Prototypes and typedefs for common routines for the
  21 + internal gf implementations.
  22 +<LI> <a href=gf_method.c><b>gf_method.c</b></a>: Code to help parse argc/argv to define the method.
  23 + This way, various programs can be consistent with how they handle the command line.
  24 +<LI> <a href=gf_method.h><b>gf_method.h</b></a>: Prototypes for ibid.
  25 +<LI> <a href=gf_methods.c><b>gf_methods.c</b></a>: This program prints out how to define
  26 + the various methods on the command line. My idea is to beef this up so that you can
  27 + give it a method spec on the command line, and it will tell you whether it's valid, or
  28 + why it's invalid. I haven't written that part yet.
  29 +<LI> <a href=gf_mult.c><b>gf_mult.c</b></a>: Program to do single multiplication.
  30 +<LI> <a href=gf_mult.c><b>gf_mult.c</b></a>: Program to do single divisions -- it's created
  31 + in the makefile with a sed script on gf_mult.c.
  32 +<LI> <a href=gf_time.c><b>gf_time.c</b></a>: Time tester
  33 +<LI> <a href=gf_unit.c><b>gf_unit.c</b></a>: Unit tester
  34 +<LI> <a href=gf_54.c><b>gf_54.c</b></a>: A simple example program that multiplies
  35 + 5 and 4 in GF(2^4).
  36 +<LI> <a href=gf_w4.c><b>gf_w4.c</b></a>: Implementation of code for <i>w</i> = 4.
  37 +(For now, only SHIFT and LOG, plus EUCLID & MATRIX).
  38 +<LI> <a href=gf_w8.c><b>gf_w8.c</b></a>: Implementation of code for <i>w</i> = 8.
  39 +(For now, only SHIFT plus EUCLID & MATRIX).
  40 +<LI> <a href=gf_w16.c><b>gf_w16.c</b></a>: Implementation of code for <i>w</i> = 16.
  41 +(For now, only SHIFT plus EUCLID & MATRIX).
  42 +<LI> <a href=gf_w32.c><b>gf_w32.c</b></a>: Implementation of code for <i>w</i> = 32.
  43 +(For now, only SHIFT plus EUCLID & MATRIX).
  44 +<LI> <a href=gf_w64.c><b>gf_w64.c</b></a>: Implementation of code for <i>w</i> = 64.
  45 +(For now, only SHIFT and EUCLID.
  46 +<LI> I don't have gf_w128.c or gf_gen.c yet.
  47 +</UL>
  48 +
  49 +<hr>
  50 +<h3>Prototypes and typedefs in gf.h</h3>
  51 +
  52 +The main structure that users will see is in <b>gf.h</b>, and it is of type
  53 +<b>gf_t</b>:
  54 +
  55 +<p><center><table border=3 cellpadding=3><td><pre>
  56 +typedef struct gf {
  57 + gf_func_a_b multiply;
  58 + gf_func_a_b divide;
  59 + gf_func_a inverse;
  60 + gf_region multiply_region;
  61 + void *scratch;
  62 +} gf_t;
  63 +</pre></td></table></center><p>
  64 +
  65 +We can beef it up later with buf-buf or buf-acc. The problem is that the paper is
  66 +already bloated, so right now, I want to keep it lean.
  67 +<p>
  68 +The types of the procedures are big unions, so that they work with the following
  69 +types of arguments:
  70 +
  71 +<p><center><table border=3 cellpadding=3><td><pre>
  72 +typedef uint8_t gf_val_4_t;
  73 +typedef uint8_t gf_val_8_t;
  74 +typedef uint16_t gf_val_16_t;
  75 +typedef uint32_t gf_val_32_t;
  76 +typedef uint64_t gf_val_64_t;
  77 +typedef uint64_t *gf_val_128_t;
  78 +typedef uint32_t gf_val_gen_t; /* The intent here is for general values <= 32 */
  79 +</pre></td></table></center><p>
  80 +
  81 +To use one of these, you need to create one with <b>gf_init_easy()</b> or
  82 +<b>gf_init_hard()</b>. Let's concentrate on the former:
  83 +
  84 +<p><center><table border=3 cellpadding=3><td><pre>
  85 +extern int gf_init_easy(gf_t *gf, int w, int mult_type);
  86 +</pre></td></table></center><p>
  87 +
  88 +You pass it memory for a <b>gf_t</b>, a value of <b>w</b> and
  89 +a variable that says how to do multiplication. The valid values of <b>mult_type</b>
  90 +are enumerated in <b>gf.h</b>:
  91 +
  92 +<p><center><table border=3 cellpadding=3><td><pre>
  93 +typedef enum {GF_MULT_DEFAULT,
  94 + GF_MULT_SHIFT,
  95 + GF_MULT_GROUP,
  96 + GF_MULT_BYTWO_p,
  97 + GF_MULT_BYTWO_b,
  98 + GF_MULT_TABLE,
  99 + GF_MULT_LOG_TABLE,
  100 + GF_MULT_SPLIT_TABLE,
  101 + GF_MULT_COMPOSITE } gf_mult_type_t;
  102 +</pre></td></table></center><p>
  103 +
  104 +After creating the <b>gf_t</b>, you use its <b>multiply</b> method
  105 +to multiply, using the union's fields to work with the various types.
  106 +It looks easier than my explanation. For example, suppose you wanted to multiply 5 and 4 in <i>GF(2<sup>4</sup>)</i>.
  107 +You can do it as in
  108 +<b><a href=gf_54.c>gf_54.c</a></b>
  109 +
  110 +<p><center><table border=3 cellpadding=3><td><pre>
  111 +#include "gf.h"
  112 +
  113 +main()
  114 +{
  115 + gf_t gf;
  116 +
  117 + gf_init_easy(&gf, 4, GF_MULT_DEFAULT);
  118 + printf("%d\n", gf.multiply.w4(&gf, 5, 4));
  119 + exit(0);
  120 +}
  121 +</pre></td></table></center><p>
  122 +
  123 +
  124 +If you wanted to multiply in <i>GF(2<sup>8</sup>)</i>, then you'd have to use 8 as a parameter
  125 +to <b>gf_init_easy</b>, and call the multiplier as <b>gf.mult.w8()</b>.
  126 +<p>
  127 +When you're done with your <b>gf_t</b>, you should call <b>gf_free()</b> on it so
  128 +that it can free memory that it has allocated. We'll talk more about memory later, but if you
  129 +create your <b>gf_t</b> with <b>gf_init_easy</b>, then it calls <b>malloc()</b>, and
  130 +if you care about freeing memory, you'll have to call <b>gf_free()</b>.
  131 +<p>
  132 +
  133 +<hr>
  134 +<h3>Memory allocation</h3>
  135 +
  136 +Each implementation of a multiplication technique keeps around its
  137 +own data. For example, <b>GF_MULT_TABLE</b> keeps around
  138 +multiplication and division tables, and <b>GF_MULT_LOG</b> maintains log and
  139 +antilog tables. This data is stored in the pointer <b>scratch</b>. My intent
  140 +is that the memory that is there is all that's required. In other
  141 +words, the <b>multiply()</b>, <b>divide()</b>, <b>inverse()</b> and
  142 +<b>multiply_region()</b> calls don't do any memory allocation.
  143 +Moreover, <b>gf_init_easy()</b> only allocates one chunk of memory --
  144 +the one in <b>scratch</b>.
  145 +<p>
  146 +If you don't want to have the initialization call allocate memory, you can use <b>gf_init_hard()</b>:
  147 +
  148 +<p><center><table border=3 cellpadding=3><td><pre>
  149 +extern int gf_init_hard(gf_t *gf,
  150 + int w,
  151 + int mult_type,
  152 + int region_type,
  153 + int divide_type,
  154 + uint64_t prim_poly,
  155 + int arg1,
  156 + int arg2,
  157 + gf_t *base_gf,
  158 + void *scratch_memory);
  159 +</pre></td></table></center><p>
  160 +
  161 +The first three parameters are the same as <b>gf_init_easy()</b>.
  162 +You can add additional arguments for performing <b>multiply_region</b>, and
  163 +for performing division in the <b>region_type</b> and <b>divide_type</b>
  164 +arguments. Their values are also defined in <b>gf.h</b>. You can
  165 +mix the <b>region_type</b> values (e.g. "DOUBLE" and "SSE"):
  166 +
  167 +<p><center><table border=3 cellpadding=3><td><pre>
  168 +#define GF_REGION_DEFAULT (0x0)
  169 +#define GF_REGION_SINGLE_TABLE (0x1)
  170 +#define GF_REGION_DOUBLE_TABLE (0x2)
  171 +#define GF_REGION_QUAD_TABLE (0x4)
  172 +#define GF_REGION_LAZY (0x8)
  173 +#define GF_REGION_SSE (0x10)
  174 +#define GF_REGION_NOSSE (0x20)
  175 +#define GF_REGION_STDMAP (0x40)
  176 +#define GF_REGION_ALTMAP (0x80)
  177 +#define GF_REGION_CAUCHY (0x100)
  178 +
  179 +typedef uint32_t gf_region_type_t;
  180 +
  181 +typedef enum { GF_DIVIDE_DEFAULT,
  182 + GF_DIVIDE_MATRIX,
  183 + GF_DIVIDE_EUCLID } gf_division_type_t;
  184 +</pre></td></table></center><p>
  185 +You can change
  186 +the primitive polynomial with <b>prim_poly</b>, give additional arguments with
  187 +<b>arg1</b> and <b>arg2</b> and give a base Galois Field for composite fields.
  188 +Finally, you can pass it a pointer to memory in <b>scratch_memory</b>. That
  189 +way, you can avoid having <b>gf_init_hard()</b> call <b>malloc()</b>.
  190 +<p>
  191 +There is a procedure called <b>gf_scratch_size()</b> that lets you know the minimum
  192 +size for <b>scratch_memory</b>, depending on <i>w</i>, the multiplication type
  193 +and the arguments:
  194 +
  195 +<p><center><table border=3 cellpadding=3><td><pre>
  196 +extern int gf_scratch_size(int w,
  197 + int mult_type,
  198 + int region_type,
  199 + int divide_type,
  200 + int arg1,
  201 + int arg2);
  202 +</pre></td></table></center><p>
  203 +
  204 +You can specify default arguments in <b>gf_init_hard()</b>:
  205 +<UL>
  206 +<LI> <b>region_type</b> = <b>GF_REGION_DEFAULT</b>
  207 +<LI> <b>divide_type</b> = <b>GF_REGION_DEFAULT</b>
  208 +<LI> <b>prim_poly</b> = 0
  209 +<LI> <b>arg1</b> = 0
  210 +<LI> <b>arg2</b> = 0
  211 +<LI> <b>base_gf</b> = <b>NULL</b>
  212 +<LI> <b>scratch_memory</b> = <b>NULL</b>
  213 +</UL>
  214 +If any argument is equal to its default, then default actions are taken (e.g. a
  215 +standard primitive polynomial is used, or memory is allocated for <b>scratch_memory</b>).
  216 +In fact, <b>gf_init_easy()</b> simply calls <b>gf_init_hard()</b> with the default
  217 +parameters.
  218 +<p>
  219 +<b>gf_free()</b> frees memory that was allocated with <b>gf_init_easy()</b>
  220 +or <b>gf_init_hard()</b>. The <b>recursive</b> parameter is in case you
  221 +use composite fields, and want to recursively free the base fields.
  222 +If you pass <b>scratch_memory</b> to <b>gf_init_hard()</b>, then you typically
  223 +don't need to call <b>gf_free()</b>. It won't hurt to call it, though.
  224 +
  225 +<hr>
  226 +<h3>gf_mult and gf_div</h3>
  227 +
  228 +For the moment, I have few things completely implemented, but that's because I want
  229 +to be able to explain the structure, and how to specify methods. In particular, for
  230 +<i>w=4</i>, I have implemented <b>SHIFT</b> and <b>LOG</b>. For <i>w=8, 16, 32, 64</i>
  231 +I have implemented <b>SHIFT</b>. For all <i>w &le; 32</i>, I have implemented both
  232 +Euclid's algorithm for inversion, and the matrix method for inversion. For
  233 +<i>w=64</i>, it's just Euclid. You can
  234 +test these all with <b>gf_mult</b> and <b>gf_div</b>. Here are a few calls:
  235 +
  236 +<pre>
  237 +UNIX> <font color=darkred><b>gf_mult 7 11 4</b></font> - Default
  238 +4
  239 +UNIX> <font color=darkred><b>gf_mult 7 11 4 SHIFT - -</b></font> - Use shift
  240 +4
  241 +UNIX> <font color=darkred><b>gf_mult 7 11 4 LOG - -</b></font> - Use logs
  242 +4
  243 +UNIX> <font color=darkred><b>gf_div 4 7 4</b></font> - Default
  244 +11
  245 +UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - -</b></font> - Use logs
  246 +11
  247 +UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - EUCLID</b></font> - Use Euclid instead of logs
  248 +11
  249 +UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - MATRIX</b></font> - Use Matrix inversion instead of logs
  250 +11
  251 +UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - -</b></font> - Default
  252 +11
  253 +UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - EUCLID</b></font> - Use Euclid (which is the default)
  254 +11
  255 +UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - MATRIX</b></font> - Use Matrix inversion instead of logs
  256 +11
  257 +UNIX> <font color=darkred><b>gf_mult 200 211 8</b></font> - The remainder are shift/Euclid
  258 +201
  259 +UNIX> <font color=darkred><b>gf_div 201 211 8</b></font>
  260 +200
  261 +UNIX> <font color=darkred><b>gf_mult 60000 65111 16</b></font>
  262 +63515
  263 +UNIX> <font color=darkred><b>gf_div 63515 65111 16</b></font>
  264 +60000
  265 +UNIX> <font color=darkred><b>gf_mult abcd0001 9afbf788 32h</b></font>
  266 +b0359681
  267 +UNIX> <font color=darkred><b>gf_div b0359681 9afbf788 32h</b></font>
  268 +abcd0001
  269 +UNIX> <font color=darkred><b>gf_mult abcd00018c8b8c8a 9afbf7887f6d8e5b 64h</b></font>
  270 +3a7def35185bd571
  271 +UNIX> <font color=darkred><b>gf_mult abcd00018c8b8c8a 9afbf7887f6d8e5b 64h</b></font>
  272 +3a7def35185bd571
  273 +UNIX> <font color=darkred><b>gf_div 3a7def35185bd571 9afbf7887f6d8e5b 64h</b></font>
  274 +abcd00018c8b8c8a
  275 +UNIX> <font color=darkred><b></b></font>
  276 +</pre>
  277 +
  278 +You can see all the methods with <b>gf_methods</b>. We have a lot of implementing to do:
  279 +
  280 +<pre>
  281 +UNIX> <font color=darkred><b>gf_methods</b></font>
  282 +To specify the methods, do one of the following:
  283 + - leave empty to use defaults
  284 + - use a single dash to use defaults
  285 + - specify MULTIPLY REGION DIVIDE
  286 +
  287 +Legal values of MULTIPLY:
  288 + SHIFT: shift
  289 + GROUP g_mult g_reduce: the Group technique - see the paper
  290 + BYTWO_p: BYTWO doubling the product.
  291 + BYTWO_b: BYTWO doubling b (more efficient thatn BYTWO_p)
  292 + TABLE: Full multiplication table
  293 + LOG: Discrete logs
  294 + LOG_ZERO: Discrete logs with a large table for zeros
  295 + SPLIT g_a g_b: Split tables defined by g_a and g_b
  296 + COMPOSITE k l [METHOD]: Composite field, recursively specify the
  297 + method of the base field in GF(2^l)
  298 +
  299 +Legal values of REGION: Specify multiples with commas e.g. 'DOUBLE,LAZY'
  300 + -: Use defaults
  301 + SINGLE/DOUBLE/QUAD: Expand tables
  302 + LAZY: Lazily create table (only applies to TABLE and SPLIT)
  303 + SSE/NOSSE: Use 128-bit SSE instructions if you can
  304 + CAUCHY/ALTMAP/STDMAP: Use different memory mappings
  305 +
  306 +Legal values of DIVIDE:
  307 + -: Use defaults
  308 + MATRIX: Use matrix inversion
  309 + EUCLID: Use the extended Euclidian algorithm.
  310 +
  311 +See the user's manual for more information.
  312 +There are many restrictions, so it is better to simply use defaults in most cases.
  313 +UNIX> <font color=darkred><b></b></font>
  314 +</pre>
  315 +
  316 +<hr>
  317 +<h3>gf_unit and gf_time</h3>
  318 +
  319 +<b><a href=gf_unit.c>gf_unit.c</a></b> is a unit tester, and
  320 +<b><a href=gf_time.c>gf_time.c</a></b> is a time tester.
  321 +
  322 +They are called as follows:
  323 +
  324 +<p><center><table border=3 cellpadding=3><td><pre>
  325 +UNIX> <font color=darkred><b>gf_unit w tests seed [METHOD] </b></font>
  326 +UNIX> <font color=darkred><b>gf_time w tests seed size(bytes) iterations [METHOD] </b></font>
  327 +</pre></td></table></center><p>
  328 +
  329 +The <b>tests</b> parameter is one or more of the following characters:
  330 +
  331 +<UL>
  332 +<LI> A: Do all tests
  333 +<LI> S: Test only single operations (multiplication/division)
  334 +<LI> R: Test only region operations
  335 +<LI> V: Verbose Output
  336 +</UL>
  337 +
  338 +<b>seed</b> is a seed for <b>srand48()</b> -- using -1 defaults to the current time.
  339 +<p>
  340 +For example, testing the defaults with w=4:
  341 +
  342 +<pre>
  343 +UNIX> <font color=darkred><b>gf_unit 4 AV 1 LOG - -</b></font>
  344 +Seed: 1
  345 +Testing single multiplications/divisions.
  346 +Testing Inversions.
  347 +Testing buffer-constant, src != dest, xor = 0
  348 +Testing buffer-constant, src != dest, xor = 1
  349 +Testing buffer-constant, src == dest, xor = 0
  350 +Testing buffer-constant, src == dest, xor = 1
  351 +UNIX> <font color=darkred><b>gf_unit 4 AV 1 SHIFT - -</b></font>
  352 +Seed: 1
  353 +Testing single multiplications/divisions.
  354 +Testing Inversions.
  355 +No multiply_region.
  356 +UNIX> <font color=darkred><b></b></font>
  357 +</pre>
  358 +
  359 +There is no <b>multiply_region()</b> method defined for <b>SHIFT</b>.
  360 +Thus, the procedures are <b>NULL</b> and the unit tester ignores them.
  361 +<p>
  362 +At the moment, I only have the unit tester working for w=4.
  363 +<p>
  364 +<b>gf_time</b> takes the size of an array (in bytes) and a number of iterations, and
  365 +tests the speed of both single and region operations. The tests are:
  366 +
  367 +<UL>
  368 +<LI> A: All
  369 +<LI> S: All Single Operations
  370 +<LI> R: All Region Operations
  371 +<LI> M: Single: Multiplications
  372 +<LI> D: Single: Divisions
  373 +<LI> I: Single: Inverses
  374 +<LI> B: Region: Multipy_Region
  375 +</UL>
  376 +
  377 +Here are some examples with <b>SHIFT</b> and <b>LOG</b> on my mac.
  378 +
  379 +<pre>
  380 +UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - -</b></font>
  381 +Seed: 1
  382 +Multiply: 0.538126 s 185.830 Mega-ops/s
  383 +Divide: 0.520825 s 192.003 Mega-ops/s
  384 +Inverse: 0.631198 s 158.429 Mega-ops/s
  385 +Buffer-Const,s!=d,xor=0: 0.478395 s 209.032 MB/s
  386 +Buffer-Const,s!=d,xor=1: 0.524245 s 190.751 MB/s
  387 +Buffer-Const,s==d,xor=0: 0.471851 s 211.931 MB/s
  388 +Buffer-Const,s==d,xor=1: 0.528275 s 189.295 MB/s
  389 +UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - EUCLID</b></font>
  390 +Seed: 1
  391 +Multiply: 0.555512 s 180.014 Mega-ops/s
  392 +Divide: 5.359434 s 18.659 Mega-ops/s
  393 +Inverse: 4.911719 s 20.359 Mega-ops/s
  394 +Buffer-Const,s!=d,xor=0: 0.496097 s 201.573 MB/s
  395 +Buffer-Const,s!=d,xor=1: 0.538536 s 185.689 MB/s
  396 +Buffer-Const,s==d,xor=0: 0.485564 s 205.946 MB/s
  397 +Buffer-Const,s==d,xor=1: 0.540227 s 185.107 MB/s
  398 +UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - MATRIX</b></font>
  399 +Seed: 1
  400 +Multiply: 0.544005 s 183.822 Mega-ops/s
  401 +Divide: 7.602822 s 13.153 Mega-ops/s
  402 +Inverse: 7.000564 s 14.285 Mega-ops/s
  403 +Buffer-Const,s!=d,xor=0: 0.474868 s 210.585 MB/s
  404 +Buffer-Const,s!=d,xor=1: 0.527588 s 189.542 MB/s
  405 +Buffer-Const,s==d,xor=0: 0.473130 s 211.358 MB/s
  406 +Buffer-Const,s==d,xor=1: 0.529877 s 188.723 MB/s
  407 +UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 SHIFT - -</b></font>
  408 +Seed: 1
  409 +Multiply: 2.708842 s 36.916 Mega-ops/s
  410 +Divide: 8.756882 s 11.420 Mega-ops/s
  411 +Inverse: 5.695511 s 17.558 Mega-ops/s
  412 +UNIX> <font color=darkred><b></b></font>
  413 +</pre>
  414 +
  415 +At the moment, I only have the timer working for w=4.
  416 +
  417 +<hr>
  418 +<h3>Walking you through <b>LOG</b></h3>
  419 +
  420 +To see how <b>scratch</b> is used to store data, let's look at what happens when
  421 +you call <b>gf_init_easy(&gf, 4, GF_MULT_LOG);</b>
  422 +First, <b>gf_init_easy()</b> calls <b>gf_init_hard()</b> with default parameters.
  423 +This is in <b><a href=gf.c>gf.c</a></b>.
  424 +<p>
  425 +<b>gf_init_hard()</b>' first job is to set up the scratch.
  426 +The scratch's type is <b>gf_internal_t</b>, defined in
  427 +<b><a href=gf_int.h>gf_int.h</a></b>:
  428 +
  429 +<p><center><table border=3 cellpadding=3><td><pre>
  430 +typedef struct {
  431 + int mult_type;
  432 + int region_type;
  433 + int divide_type;
  434 + int w;
  435 + uint64_t prim_poly;
  436 + int free_me;
  437 + int arg1;
  438 + int arg2;
  439 + gf_t *base_gf;
  440 + void *private;
  441 +} gf_internal_t;
  442 +</pre></td></table></center><p>
  443 +
  444 +All the fields are straightfoward, with the exception of <b>private</b>. That is
  445 +a <b>(void *)</b> which points to the implementation's private data.
  446 +<p>
  447 +Here's the code for
  448 +<b>gf_init_hard()</b>:
  449 +
  450 +<p><center><table border=3 cellpadding=3><td><pre>
  451 +int gf_init_hard(gf_t *gf, int w, int mult_type,
  452 + int region_type,
  453 + int divide_type,
  454 + uint64_t prim_poly,
  455 + int arg1, int arg2,
  456 + gf_t *base_gf,
  457 + void *scratch_memory)
  458 +{
  459 + int sz;
  460 + gf_internal_t *h;
  461 +
  462 +
  463 + if (scratch_memory == NULL) {
  464 + sz = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
  465 + if (sz &lt;= 0) return 0;
  466 + h = (gf_internal_t *) malloc(sz);
  467 + h-&gt;free_me = 1;
  468 + } else {
  469 + h = scratch_memory;
  470 + h-&gt;free_me = 0;
  471 + }
  472 + gf-&gt;scratch = (void *) h;
  473 + h-&gt;mult_type = mult_type;
  474 + h-&gt;region_type = region_type;
  475 + h-&gt;divide_type = divide_type;
  476 + h-&gt;w = w;
  477 + h-&gt;prim_poly = prim_poly;
  478 + h-&gt;arg1 = arg1;
  479 + h-&gt;arg2 = arg2;
  480 + h-&gt;base_gf = base_gf;
  481 + h-&gt;private = (void *) gf-&gt;scratch;
  482 + h-&gt;private += (sizeof(gf_internal_t));
  483 +
  484 + switch(w) {
  485 + case 4: return gf_w4_init(gf);
  486 + case 8: return gf_w8_init(gf);
  487 + case 16: return gf_w16_init(gf);
  488 + case 32: return gf_w32_init(gf);
  489 + case 64: return gf_w64_init(gf);
  490 + case 128: return gf_dummy_init(gf);
  491 + default: return 0;
  492 + }
  493 +}
  494 +</pre></td></table></center><p>
  495 +
  496 +The first thing it does is determine if it has to allocate space for <b>scratch</b>.
  497 +If it must, it uses <b>gf_scratch_size()</b> to figure out how big the space must be.
  498 +It then sets <b>gf->scratch</b> to this space, and sets all of the fields of the
  499 +scratch to the arguments in <b>gf_init_hard()</b>. The <b>private</b> pointer is
  500 +set to be the space just after the pointer <b>gf->private</b>. Again, it is up to
  501 +<b>gf_scratch_size()</b> to make sure there is enough space for the scratch, and
  502 +for all of the private data needed by the implementation.
  503 +<p>
  504 +Once the scratch is set up, <b>gf_init_hard()</b> calls <b>gf_w4_init()</b>. This is
  505 +in <b><a href=gf_w4.c>gf_w4.c</a></b>, and it is a
  506 +simple dispatcher to the various initialization routines, plus it
  507 +sets <b>EUCLID</b> and <b>MATRIX</b> if need be:
  508 +
  509 +<p><center><table border=3 cellpadding=3><td><pre>
  510 +int gf_w4_init(gf_t *gf)
  511 +{
  512 + gf_internal_t *h;
  513 +
  514 + h = (gf_internal_t *) gf-&gt;scratch;
  515 + if (h-&gt;prim_poly == 0) h-&gt;prim_poly = 0x13;
  516 +
  517 + gf-&gt;multiply.w4 = NULL;
  518 + gf-&gt;divide.w4 = NULL;
  519 + gf-&gt;inverse.w4 = NULL;
  520 + gf-&gt;multiply_region.w4 = NULL;
  521 +
  522 + switch(h-&gt;mult_type) {
  523 + case GF_MULT_SHIFT: if (gf_w4_shift_init(gf) == 0) return 0; break;
  524 + case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break;
  525 + case GF_MULT_DEFAULT: if (gf_w4_log_init(gf) == 0) return 0; break;
  526 + default: return 0;
  527 + }
  528 + if (h-&gt;divide_type == GF_DIVIDE_EUCLID) {
  529 + gf-&gt;divide.w4 = gf_w4_divide_from_inverse;
  530 + gf-&gt;inverse.w4 = gf_w4_euclid;
  531 + } else if (h-&gt;divide_type == GF_DIVIDE_MATRIX) {
  532 + gf-&gt;divide.w4 = gf_w4_divide_from_inverse;
  533 + gf-&gt;inverse.w4 = gf_w4_matrix;
  534 + }
  535 +
  536 + if (gf-&gt;inverse.w4 != NULL && gf-&gt;divide.w4 == NULL) {
  537 + gf-&gt;divide.w4 = gf_w4_divide_from_inverse;
  538 + }
  539 + if (gf-&gt;inverse.w4 == NULL && gf-&gt;divide.w4 != NULL) {
  540 + gf-&gt;inverse.w4 = gf_w4_inverse_from_divide;
  541 + }
  542 + return 1;
  543 +}
  544 +</pre></td></table></center><p>
  545 +
  546 +The code in <b>gf_w4_log_init()</b> sets up the log and antilog tables, and sets
  547 +the <b>multiply.w4</b>, <b>divide.w4</b> etc routines to be the ones for logs. The
  548 +tables are put into <b>gf->scratch->private</b>, which is typecast to a <b>struct
  549 +gf_logtable_data *</b>:
  550 +
  551 +<p><center><table border=3 cellpadding=3><td><pre>
  552 +struct gf_logtable_data {
  553 + gf_val_4_t log_tbl[GF_FIELD_SIZE];
  554 + gf_val_4_t antilog_tbl[GF_FIELD_SIZE * 2];
  555 + gf_val_4_t *antilog_tbl_div;
  556 +};
  557 +.......
  558 +
  559 +static
  560 +int gf_w4_log_init(gf_t *gf)
  561 +{
  562 + gf_internal_t *h;
  563 + struct gf_logtable_data *ltd;
  564 + int i, b;
  565 +
  566 + h = (gf_internal_t *) gf-&gt;scratch;
  567 + ltd = h-&gt;private;
  568 +
  569 + ltd-&gt;log_tbl[0] = 0;
  570 +
  571 + ltd-&gt;antilog_tbl_div = ltd-&gt;antilog_tbl + (GF_FIELD_SIZE-1);
  572 + b = 1;
  573 + for (i = 0; i &lt; GF_FIELD_SIZE-1; i++) {
  574 + ltd-&gt;log_tbl[b] = (gf_val_8_t)i;
  575 + ltd-&gt;antilog_tbl[i] = (gf_val_8_t)b;
  576 + ltd-&gt;antilog_tbl[i+GF_FIELD_SIZE-1] = (gf_val_8_t)b;
  577 + b &lt;&lt;= 1;
  578 + if (b & GF_FIELD_SIZE) {
  579 + b = b ^ h-&gt;prim_poly;
  580 + }
  581 + }
  582 +
  583 + gf-&gt;inverse.w4 = gf_w4_inverse_from_divide;
  584 + gf-&gt;divide.w4 = gf_w4_log_divide;
  585 + gf-&gt;multiply.w4 = gf_w4_log_multiply;
  586 + gf-&gt;multiply_region.w4 = gf_w4_log_multiply_region;
  587 + return 1;
  588 +}
  589 +</pre></td></table></center><p>
  590 +
  591 +And of course the individual routines use <b>h->private</b> to access the tables:
  592 +
  593 +<p><center><table border=3 cellpadding=3><td><pre>
  594 +static
  595 +inline
  596 +gf_val_8_t gf_w4_log_multiply (gf_t *gf, gf_val_8_t a, gf_val_8_t b)
  597 +{
  598 + struct gf_logtable_data *ltd;
  599 +
  600 + ltd = (struct gf_logtable_data *) ((gf_internal_t *) (gf-&gt;scratch))-&gt;private;
  601 + return (a == 0 || b == 0) ? 0 : ltd-&gt;antilog_tbl[(unsigned)(ltd-&gt;log_tbl[a] + ltd-&gt;log_tbl[b])];
  602 +}
  603 +</pre></td></table></center><p>
  604 +
  605 +Finally, it's important that the proper sizes are put into
  606 +<b>gf_w4_scratch_size()</b> for each implementation:
  607 +
  608 +<p><center><table border=3 cellpadding=3><td><pre>
  609 +int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
  610 +{
  611 + int region_tbl_size;
  612 + switch(mult_type)
  613 + {
  614 + case GF_MULT_DEFAULT:
  615 + case GF_MULT_LOG_TABLE:
  616 + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
  617 + break;
  618 + case GF_MULT_SHIFT:
  619 + return sizeof(gf_internal_t);
  620 + break;
  621 + default:
  622 + return -1;
  623 + }
  624 +}
  625 +</pre></td></table></center><p>
  626 +I hope that's enough explanation for y'all to start implementing. Let me know if you have
  627 +problems -- thanks -- Jim
  628 +
  629 +<hr>
  630 +The initial structure has been set for w=4, 8, 16, 32 and 64, with implementations of SHIFT and EUCLID, and for w <= 32, MATRIX. There are some weird caveats:
  631 +
  632 +<UL>
  633 +<LI> For w=32 and w=64, the primitive polynomial does not have the leading one.
  634 +<LI> I'd like for naming to be:
  635 +<p>
  636 +<UL>
  637 + <b>gf_w</b><i>w</i><b>_</b><i>technique</i></i><b>_</b><i>funcationality</i><b>()</b>.
  638 +</UL>
  639 +<p>
  640 +For example, the log techniques for w=4 are:
  641 +<pre>
  642 +gf_w4_log_multiply()
  643 +gf_w4_log_divide()
  644 +gf_w4_log_multiply_region()
  645 +gf_w4_log_init()
  646 +</pre>
  647 +<p>
  648 +<LI> I'd also like a header block on implementations that says who wrote it.
  649 +</UL>
  650 +
  651 +<hr>
  652 +<h3>Things we need to Implement: <i>w=4</i></h3>
  653 +
  654 +<p><table border=3 cellpadding=2>
  655 +<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
  656 +<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
  657 +<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
  658 +<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
  659 +<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
  660 +<tr> <td> Single TABLE </td> <td> Done - Jim </td> </tr>
  661 +<tr> <td> Double TABLE </td> <td> Done - Jim </td> </tr>
  662 +<tr> <td> Double TABLE, SSE </td> <td> Done - Jim </td> </tr>
  663 +<tr> <td> Quad TABLE </td> <td>Done - Jim</td> </tr>
  664 +<tr> <td> Lazy Quad TABLE </td> <td>Done - Jim</td> </tr>
  665 +<tr> <td> LOG </td> <td> Done - Jim </td> </tr>
  666 +</table><p>
  667 +
  668 +<hr>
  669 +<h3>Things we need to Implement: <i>w=8</i></h3>
  670 +
  671 +<p><table border=3 cellpadding=2>
  672 +<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
  673 +<tr> <td> BYTWO_p </td> <td>Done - Jim </td> </tr>
  674 +<tr> <td> BYTWO_b </td> <td>Done - Jim </td> </tr>
  675 +<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim </td> </tr>
  676 +<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim </td> </tr>
  677 +<tr> <td> Single TABLE </td> <td> Done - Kevin </td> </tr>
  678 +<tr> <td> Double TABLE </td> <td> Done - Jim </td> </tr>
  679 +<tr> <td> Lazy Double TABLE </td> <td> Done - Jim </td> </tr>
  680 +<tr> <td> Split 2 1 (Half) SSE </td> <td>Done - Jim</td> </tr>
  681 +<tr> <td> Composite, k=2 </td> <td> Done - Kevin (alt mapping not passing unit test) </td> </tr>
  682 +<tr> <td> LOG </td> <td> Done - Kevin </td> </tr>
  683 +<tr> <td> LOG ZERO</td> <td> Done - Jim</td> </tr>
  684 +</table><p>
  685 +
  686 +<hr>
  687 +<h3>Things we need to Implement: <i>w=16</i></h3>
  688 +
  689 +<p><table border=3 cellpadding=2>
  690 +<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
  691 +<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
  692 +<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
  693 +<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
  694 +<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
  695 +<tr> <td> Lazy TABLE </td> <td>Done - Jim</td> </tr>
  696 +<tr> <td> Split 4 16 No-SSE, lazy </td> <td>Done - Jim</td> </tr>
  697 +<tr> <td> Split 4 16 SSE, lazy </td> <td>Done - Jim</td> </tr>
  698 +<tr> <td> Split 4 16 SSE, lazy, alternate mapping </td> <td>Done - Jim</td> </tr>
  699 +<tr> <td> Split 8 16, lazy </td> <td>Done - Jim</td> </tr>
  700 +<tr> <td> Composite, k=2, stdmap recursive </td> <td> Done - Kevin</td> </tr>
  701 +<tr> <td> Composite, k=2, altmap recursive </td> <td> Done - Kevin</td> </tr>
  702 +<tr> <td> Composite, k=2, stdmap inline </td> <td> Done - Kevin</td> </tr>
  703 +<tr> <td> LOG </td> <td> Done - Kevin </td> </tr>
  704 +<tr> <td> LOG ZERO</td> <td> Done - Kevin </td> </tr>
  705 +<tr> <td> Group 4 4 </td> <td>Done - Jim: I don't see a reason to implement others, although 4-8 will be faster, and 8 8 will have faster region ops. They'll never beat SPLIT.</td> </tr>
  706 +</table><p>
  707 +
  708 +<hr>
  709 +<h3>Things we need to Implement: <i>w=32</i></h3>
  710 +
  711 +<p><table border=3 cellpadding=2>
  712 +<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
  713 +<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
  714 +<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
  715 +<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
  716 +<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
  717 +<tr> <td> Split 2 32,lazy </td> <td>Done - Jim</td> </tr>
  718 +<tr> <td> Split 2 32, SSE, lazy </td> <td>Done - Jim</td> </tr>
  719 +<tr> <td> Split 4 32, lazy </td> <td>Done - Jim</td> </tr>
  720 +<tr> <td> Split 4 32, SSE,ALTMAP lazy </td> <td>Done - Jim</td> </tr>
  721 +<tr> <td> Split 4 32, SSE, lazy </td> <td>Done - Jim</td> </tr>
  722 +<tr> <td> Split 8 8 </td> <td>Done - Jim </td> </tr>
  723 +<tr> <td> Group, g_s == g_r </td> <td>Done - Jim</td></tr>
  724 +<tr> <td> Group, any g_s and g_r</td> <td>Done - Jim</td></tr>
  725 +<tr> <td> Composite, k=2, stdmap recursive </td> <td> Done - Kevin</td> </tr>
  726 +<tr> <td> Composite, k=2, altmap recursive </td> <td> Done - Kevin</td> </tr>
  727 +<tr> <td> Composite, k=2, stdmap inline </td> <td> Done - Kevin</td> </tr>
  728 +</table><p>
  729 +<hr>
  730 +<h3>Things we need to Implement: <i>w=64</i></h3>
  731 +
  732 +<p><table border=3 cellpadding=2>
  733 +<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
  734 +<tr> <td> BYTWO_p </td> <td> - </td> </tr>
  735 +<tr> <td> BYTWO_b </td> <td> - </td> </tr>
  736 +<tr> <td> BYTWO_p, SSE </td> <td> - </td> </tr>
  737 +<tr> <td> BYTWO_b, SSE </td> <td> - </td> </tr>
  738 +<tr> <td> Split 16 1 SSE, maybe lazy </td> <td> - </td> </tr>
  739 +<tr> <td> Split 8 1 lazy </td> <td> - </td> </tr>
  740 +<tr> <td> Split 8 8 </td> <td> - </td> </tr>
  741 +<tr> <td> Split 8 8 lazy </td> <td> - </td> </tr>
  742 +<tr> <td> Group </td> <td> - </td> </tr>
  743 +<tr> <td> Composite, k=2, alternate mapping </td> <td> - </td> </tr>
  744 +</table><p>
  745 +<hr>
  746 +<h3>Things we need to Implement: <i>w=128</i></h3>
  747 +
  748 +<p><table border=3 cellpadding=2>
  749 +<tr> <td> SHIFT </td> <td> Done - Will </td> </tr>
  750 +<tr> <td> BYTWO_p </td> <td> - </td> </tr>
  751 +<tr> <td> BYTWO_b </td> <td> - </td> </tr>
  752 +<tr> <td> BYTWO_p, SSE </td> <td> - </td> </tr>
  753 +<tr> <td> BYTWO_b, SSE </td> <td> - </td> </tr>
  754 +<tr> <td> Split 32 1 SSE, maybe lazy </td> <td> - </td> </tr>
  755 +<tr> <td> Split 16 1 lazy </td> <td> - </td> </tr>
  756 +<tr> <td> Split 16 16 - Maybe that's insanity</td> <td> - </td> </tr>
  757 +<tr> <td> Split 16 16 lazy </td> <td> - </td> </tr>
  758 +<tr> <td> Group (SSE) </td> <td> - </td> </tr>
  759 +<tr> <td> Composite, k=?, alternate mapping </td> <td> - </td> </tr>
  760 +</table><p>
  761 +<hr>
  762 +<h3>Things we need to Implement: <i>w=general between 1 & 32</i></h3>
  763 +
  764 +<p><table border=3 cellpadding=2>
  765 +<tr> <td> CAUCHY Region (SSE XOR)</td> <td> Done - Jim </td> </tr>
  766 +<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
  767 +<tr> <td> TABLE </td> <td> Done - Jim </td> </tr>
  768 +<tr> <td> LOG </td> <td> Done - Jim </td> </tr>
  769 +<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
  770 +<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
  771 +<tr> <td> Group, g_s == g_r </td> <td>Done - Jim</td></tr>
  772 +<tr> <td> Group, any g_s and g_r</td> <td>Done - Jim</td></tr>
  773 +<tr> <td> Split - do we need it?</td> <td>Done - Jim</td></tr>
  774 +<tr> <td> Composite - do we need it?</td> <td> - </td></tr>
  775 +<tr> <td> Split - do we need it?</td> <td> - </td></tr>
  776 +<tr> <td> Logzero?</td> <td> - </td></tr>
  777 +</table><p>
... ...
gf.c 0 → 100644
  1 +++ a/gf.c
... ... @@ -0,0 +1,478 @@
  1 +/*
  2 + * gf.c
  3 + *
  4 + * Generic routines for Galois fields
  5 + */
  6 +
  7 +#include "gf_int.h"
  8 +#include <stdio.h>
  9 +#include <stdlib.h>
  10 +
  11 +int gf_scratch_size(int w,
  12 + int mult_type,
  13 + int region_type,
  14 + int divide_type,
  15 + int arg1,
  16 + int arg2)
  17 +{
  18 + switch(w) {
  19 + case 4: return gf_w4_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
  20 + case 8: return gf_w8_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
  21 + case 16: return gf_w16_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
  22 + case 32: return gf_w32_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
  23 + case 64: return gf_w64_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
  24 + case 128: return gf_w128_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
  25 + default: return gf_wgen_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
  26 + }
  27 +}
  28 +
  29 +int gf_dummy_init(gf_t *gf)
  30 +{
  31 + return 0;
  32 +}
  33 +
  34 +int gf_init_easy(gf_t *gf, int w, int mult_type)
  35 +{
  36 + return gf_init_hard(gf, w, mult_type, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT, 0, 0, 0, NULL, NULL);
  37 +}
  38 +
  39 +int gf_init_hard(gf_t *gf, int w, int mult_type,
  40 + int region_type,
  41 + int divide_type,
  42 + uint64_t prim_poly,
  43 + int arg1, int arg2,
  44 + gf_t *base_gf,
  45 + void *scratch_memory)
  46 +{
  47 + int sz;
  48 + gf_internal_t *h;
  49 +
  50 + sz = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
  51 +
  52 + if (sz <= 0) return 0;
  53 +
  54 + if (scratch_memory == NULL) {
  55 + h = (gf_internal_t *) malloc(sz);
  56 + h->free_me = 1;
  57 + } else {
  58 + h = scratch_memory;
  59 + h->free_me = 0;
  60 + }
  61 + gf->scratch = (void *) h;
  62 + h->mult_type = mult_type;
  63 + h->region_type = region_type;
  64 + h->divide_type = divide_type;
  65 + h->w = w;
  66 + h->prim_poly = prim_poly;
  67 + h->arg1 = arg1;
  68 + h->arg2 = arg2;
  69 + h->base_gf = base_gf;
  70 + h->private = (void *) gf->scratch;
  71 + h->private += (sizeof(gf_internal_t));
  72 + gf->extract_word.w32 = NULL;
  73 +
  74 + //printf("Created w=%d, with mult_type=%d and region_type=%d\n", w, mult_type, region_type);
  75 +
  76 + switch(w) {
  77 + case 4: return gf_w4_init(gf);
  78 + case 8: return gf_w8_init(gf);
  79 + case 16: return gf_w16_init(gf);
  80 + case 32: return gf_w32_init(gf);
  81 + case 64: return gf_w64_init(gf);
  82 + case 128: return gf_w128_init(gf);
  83 + default: return gf_wgen_init(gf);
  84 + }
  85 +}
  86 +
  87 +int gf_free(gf_t *gf, int recursive)
  88 +{
  89 + gf_internal_t *h;
  90 +
  91 + h = (gf_internal_t *) gf->scratch;
  92 + if (recursive && h->base_gf != NULL) {
  93 + gf_free(h->base_gf, 1);
  94 + free(h->base_gf);
  95 + }
  96 + if (h->free_me) free(h);
  97 +}
  98 +
  99 +void gf_alignment_error(char *s, int a)
  100 +{
  101 + fprintf(stderr, "Alignment error in %s:\n", s);
  102 + fprintf(stderr, " The source and destination buffers must be aligned to each other,\n");
  103 + fprintf(stderr, " and they must be aligned to a %d-byte address.\n", a);
  104 + exit(1);
  105 +}
  106 +
  107 +/* Lifted this code from Jens Gregor -- thanks, Jens */
  108 +
  109 +int gf_is_sse2()
  110 +{
  111 + unsigned int cpeinfo;
  112 + unsigned int cpsse;
  113 + asm ( "mov $0x1, %%eax\n\t"
  114 + "cpuid\n\t"
  115 + "mov %%edx, %0\n\t"
  116 + "mov %%ecx, %1\n" : "=m" (cpeinfo), "=m" (cpsse));
  117 + if ((cpeinfo >> 26) & 0x1 ) return 1;
  118 + return 0;
  119 +}
  120 +
  121 +static
  122 +void gf_invert_binary_matrix(int *mat, int *inv, int rows) {
  123 + int cols, i, j, k;
  124 + int tmp;
  125 +
  126 + cols = rows;
  127 +
  128 + for (i = 0; i < rows; i++) inv[i] = (1 << i);
  129 +
  130 + /* First -- convert into upper triangular */
  131 +
  132 + for (i = 0; i < cols; i++) {
  133 +
  134 + /* Swap rows if we ave a zero i,i element. If we can't swap, then the
  135 + matrix was not invertible */
  136 +
  137 + if ((mat[i] & (1 << i)) == 0) {
  138 + for (j = i+1; j < rows && (mat[j] & (1 << i)) == 0; j++) ;
  139 + if (j == rows) {
  140 + fprintf(stderr, "galois_invert_matrix: Matrix not invertible!!\n");
  141 + exit(1);
  142 + }
  143 + tmp = mat[i]; mat[i] = mat[j]; mat[j] = tmp;
  144 + tmp = inv[i]; inv[i] = inv[j]; inv[j] = tmp;
  145 + }
  146 +
  147 + /* Now for each j>i, add A_ji*Ai to Aj */
  148 + for (j = i+1; j != rows; j++) {
  149 + if ((mat[j] & (1 << i)) != 0) {
  150 + mat[j] ^= mat[i];
  151 + inv[j] ^= inv[i];
  152 + }
  153 + }
  154 + }
  155 +
  156 + /* Now the matrix is upper triangular. Start at the top and multiply down */
  157 +
  158 + for (i = rows-1; i >= 0; i--) {
  159 + for (j = 0; j < i; j++) {
  160 + if (mat[j] & (1 << i)) {
  161 + /* mat[j] ^= mat[i]; */
  162 + inv[j] ^= inv[i];
  163 + }