Commit c4ab254bbce42e6df9c89aee11ddb459bc86f4d0

Authored by KMG
1 parent 17333662
Exists in master and in 1 other branch v2

Do SIMD XOR, where possible!

include/galois.h
... ... @@ -51,9 +51,8 @@ extern int galois_single_multiply(int a, int b, int w);
51 51 extern int galois_single_divide(int a, int b, int w);
52 52 extern int galois_inverse(int x, int w);
53 53  
54   -void galois_region_xor( char *r1, /* Region 1 */
55   - char *r2, /* Region 2 */
56   - char *r3, /* Sum region (r3 = r1 ^ r2) -- can be r1 or r2 */
  54 +void galois_region_xor( char *src, /* Source Region */
  55 + char *dest, /* Dest Region (holds result) */
57 56 int nbytes); /* Number of bytes in region */
58 57  
59 58 /* These multiply regions in w=8, w=16 and w=32. They are much faster
... ...
src/galois.c
... ... @@ -324,29 +324,9 @@ void galois_w32_region_xor(void *src, void *dest, int nbytes)
324 324 gfp_array[32]->multiply_region.w32(gfp_array[32], src, dest, 1, nbytes, 1);
325 325 }
326 326  
327   -void galois_region_xor(char *r1, /* Region 1 */
328   - char *r2, /* Region 2 */
329   - char *r3, /* Sum region (r3 = r1 ^ r2) -- can be r1 or r2 */
330   - int nbytes) /* Number of bytes in region */
  327 +void galois_region_xor(char *src, char *dest, int nbytes)
331 328 {
332   - long *l1;
333   - long *l2;
334   - long *l3;
335   - long *ltop;
336   - char *ctop;
337   -
338   - ctop = r1 + nbytes;
339   - ltop = (long *) ctop;
340   - l1 = (long *) r1;
341   - l2 = (long *) r2;
342   - l3 = (long *) r3;
343   -
344   - while (l1 < ltop) {
345   - *l3 = ((*l1) ^ (*l2));
346   - l1++;
347   - l2++;
348   - l3++;
349   - }
  329 + galois_w32_region_xor(src, dest, nbytes);
350 330 }
351 331  
352 332 int galois_inverse(int y, int w)
... ...
src/jerasure.c
... ... @@ -341,7 +341,7 @@ void jerasure_bitmatrix_dotprod(int k, int w, int *bitmatrix_row,
341 341 jerasure_total_memcpy_bytes += packetsize;
342 342 pstarted = 1;
343 343 } else {
344   - galois_region_xor(pptr, dptr, pptr, packetsize);
  344 + galois_region_xor(dptr, pptr, packetsize);
345 345 jerasure_total_xor_bytes += packetsize;
346 346 }
347 347 }
... ... @@ -360,7 +360,7 @@ void jerasure_do_parity(int k, char **data_ptrs, char *parity_ptr, int size)
360 360 jerasure_total_memcpy_bytes += size;
361 361  
362 362 for (i = 1; i < k; i++) {
363   - galois_region_xor(data_ptrs[i], parity_ptr, parity_ptr, size);
  363 + galois_region_xor(data_ptrs[i], parity_ptr, size);
364 364 jerasure_total_xor_bytes += size;
365 365 }
366 366 }
... ... @@ -599,7 +599,7 @@ void jerasure_matrix_dotprod(int k, int w, int *matrix_row,
599 599 jerasure_total_memcpy_bytes += size;
600 600 init = 1;
601 601 } else {
602   - galois_region_xor(sptr, dptr, dptr, size);
  602 + galois_region_xor(sptr, dptr, size);
603 603 jerasure_total_xor_bytes += size;
604 604 }
605 605 }
... ... @@ -1173,7 +1173,7 @@ void jerasure_do_scheduled_operations(char **ptrs, int **operations, int packets
1173 1173 operations[op][2],
1174 1174 operations[op][3]);
1175 1175 printf("xor(0x%x, 0x%x -> 0x%x, %d)\n", sptr, dptr, dptr, packetsize); */
1176   - galois_region_xor(sptr, dptr, dptr, packetsize);
  1176 + galois_region_xor(sptr, dptr, packetsize);
1177 1177 jerasure_total_xor_bytes += packetsize;
1178 1178 } else {
1179 1179 /* printf("memcpy(0x%x <- 0x%x)\n", dptr, sptr); */
... ...
src/reed_sol.c
... ... @@ -213,7 +213,7 @@ int reed_sol_r6_encode(int k, int w, char **data_ptrs, char **coding_ptrs, int s
213 213  
214 214 memcpy(coding_ptrs[0], data_ptrs[0], size);
215 215  
216   - for (i = 1; i < k; i++) galois_region_xor(coding_ptrs[0], data_ptrs[i], coding_ptrs[0], size);
  216 + for (i = 1; i < k; i++) galois_region_xor(data_ptrs[i], coding_ptrs[0], size);
217 217  
218 218 /* Next, put the sum of (2^j)*Dj into coding region 1 */
219 219  
... ... @@ -227,7 +227,7 @@ int reed_sol_r6_encode(int k, int w, char **data_ptrs, char **coding_ptrs, int s
227 227 default: return 0;
228 228 }
229 229  
230   - galois_region_xor(coding_ptrs[1], data_ptrs[i], coding_ptrs[1], size);
  230 + galois_region_xor(data_ptrs[i], coding_ptrs[1], size);
231 231 }
232 232 return 1;
233 233 }
... ...