1    	/*
2    	 * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
3    	 * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
4    	 * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
5    	 *
6    	 * gf_general.c
7    	 *
8    	 * This file has helper routines for doing basic GF operations with any
9    	 * legal value of w.  The problem is that w <= 32, w=64 and w=128 all have
10   	 * different data types, which is a pain.  The procedures in this file try
11   	 * to alleviate that pain.  They are used in gf_unit and gf_time.
12   	 */
13   	
14   	#include <stdio.h>
15   	#include <getopt.h>
16   	#include <stdint.h>
17   	#include <string.h>
18   	#include <stdlib.h>
19   	#include <time.h>
20   	#include <assert.h>
21   	
22   	#include "gf_complete.h"
23   	#include "gf_int.h"
24   	#include "gf_method.h"
25   	#include "gf_rand.h"
26   	#include "gf_general.h"
27   	
28   	void gf_general_set_zero(gf_general_t *v, int w)
29   	{
30   	  if (w <= 32) {
31   	    v->w32 = 0;
32   	  } else if (w <= 64) {
33   	    v->w64 = 0;
34   	  } else {
35   	    v->w128[0] = 0;
36   	    v->w128[1] = 0;
37   	  }
38   	}
39   	
40   	void gf_general_set_one(gf_general_t *v, int w)
41   	{
42   	  if (w <= 32) {
43   	    v->w32 = 1;
44   	  } else if (w <= 64) {
45   	    v->w64 = 1;
46   	  } else {
47   	    v->w128[0] = 0;
48   	    v->w128[1] = 1;
49   	  }
50   	}
51   	
52   	void gf_general_set_two(gf_general_t *v, int w)
53   	{
54   	  if (w <= 32) {
55   	    v->w32 = 2;
56   	  } else if (w <= 64) {
57   	    v->w64 = 2;
58   	  } else {
59   	    v->w128[0] = 0;
60   	    v->w128[1] = 2;
61   	  }
62   	}
63   	
64   	int gf_general_is_zero(gf_general_t *v, int w) 
65   	{
66   	  if (w <= 32) {
67   	    return (v->w32 == 0);
68   	  } else if (w <= 64) {
69   	    return (v->w64 == 0);
70   	  } else {
71   	    return (v->w128[0] == 0 && v->w128[1] == 0);
72   	  }
73   	}
74   	
75   	int gf_general_is_one(gf_general_t *v, int w) 
76   	{
77   	  if (w <= 32) {
78   	    return (v->w32 == 1);
79   	  } else if (w <= 64) {
80   	    return (v->w64 == 1);
81   	  } else {
82   	    return (v->w128[0] == 0 && v->w128[1] == 1);
83   	  }
84   	}
85   	
86   	void gf_general_set_random(gf_general_t *v, int w, int zero_ok) 
87   	{
88   	  if (w <= 32) {
89   	      v->w32 = MOA_Random_W(w, zero_ok);
90   	  } else if (w <= 64) {
91   	    while (1) {
92   	      v->w64 = MOA_Random_64();
93   	      if (v->w64 != 0 || zero_ok) return;
94   	    }
95   	  } else {
96   	    while (1) {
97   	      MOA_Random_128(v->w128);
98   	      if (v->w128[0] != 0 || v->w128[1] != 0 || zero_ok) return;
99   	    }
100  	  }
101  	}
102  	
103  	void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex)
104  	{
105  	  if (w <= 32) {
106  	    if (hex) {
107  	      sprintf(s, "%x", v->w32);
108  	    } else {
109  	      sprintf(s, "%u", v->w32);
110  	    }
111  	  } else if (w <= 64) {
112  	    if (hex) {
113  	      sprintf(s, "%llx", (long long unsigned int) v->w64);
114  	    } else {
115  	      sprintf(s, "%lld", (long long unsigned int) v->w64);
116  	    }
117  	  } else {
118  	    if (v->w128[0] == 0) {
119  	      sprintf(s, "%llx", (long long unsigned int) v->w128[1]);
120  	    } else {
121  	      sprintf(s, "%llx%016llx", (long long unsigned int) v->w128[0], 
122  	                                (long long unsigned int) v->w128[1]);
123  	    }
124  	  }
125  	}
126  	
127  	int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex)
128  	{
129  	  int l;
130  	  int save;
131  	
(1) Event cond_false: Condition "w <= 32", taking false branch.
132  	  if (w <= 32) {
133  	    if (hex) {
134  	      if (sscanf(s, "%x", &(v->w32)) == 0) return 0;
135  	    } else {
136  	      if (sscanf(s, "%u", &(v->w32)) == 0) return 0;
137  	    }
138  	    if (w == 32) return 1;
139  	    if (w == 31) {
140  	      if (v->w32 & ((gf_val_32_t)1 << 31)) return 0;
141  	      return 1;
142  	    } 
143  	    if (v->w32 & ~((1 << w)-1)) return 0;
144  	    return 1;
(2) Event else_branch: Reached else branch.
(3) Event cond_false: Condition "w <= 64", taking false branch.
145  	  } else if (w <= 64) {
146  	    if (hex) return (sscanf(s, "%llx", (long long unsigned int *) (&(v->w64))) == 1);
147  	    return (sscanf(s, "%lld", (long long int *) (&(v->w64))) == 1);
(4) Event else_branch: Reached else branch.
148  	  } else {
(5) Event cond_false: Condition "!hex", taking false branch.
(6) Event if_end: End of if statement.
149  	    if (!hex) return 0;
150  	    l = strlen(s);
(7) Event cond_false: Condition "l <= 16", taking false branch.
151  	    if (l <= 16) {
152  	      v->w128[0] = 0;
153  	      return (sscanf(s, "%llx", (long long unsigned int *) (&(v->w128[1]))) == 1);
(8) Event else_branch: Reached else branch.
154  	    } else {
(9) Event cond_false: Condition "l > 32", taking false branch.
(10) Event if_end: End of if statement.
155  	      if (l > 32) return 0;
(11) Event save: Saving non-local "s[l - 16]" in local "save".
Also see events: [modify][end_of_scope][restore_example]
156  	      save = s[l-16];
(12) Event modify: Modifying non-local "s[l - 16]".
Also see events: [save][end_of_scope][restore_example]
157  	      s[l-16] = '\0';
(13) Event cond_false: Condition "sscanf(s, "%llx", (unsigned long long *)&v->w128[0]) == 0", taking false branch.
158  	      if (sscanf(s, "%llx", (long long unsigned int *) (&(v->w128[0]))) == 0) {
(17) Event restore_example: The original value of non-local "s[l - 16]" was restored here.
Also see events: [save][modify][end_of_scope]
159  	        s[l-16] = save;
160  	        return 0;
(14) Event if_end: End of if statement.
161  	      }
(15) Event cond_true: Condition "sscanf(s + (l - 16), "%llx", (unsigned long long *)&v->w128[1]) == 1", taking true branch.
(16) Event end_of_scope: Value of non-local "s[l - 16]" that was saved in "save" is not restored as it was along other paths.
Also see events: [save][modify][restore_example]
162  	      return (sscanf(s+(l-16), "%llx", (long long unsigned int *) (&(v->w128[1]))) == 1);
163  	    }
164  	  }
165  	}
166  	    
167  	void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
168  	{
169  	  gf_internal_t *h;
170  	  int w;
171  	
172  	  h = (gf_internal_t *) gf->scratch;
173  	  w = h->w;
174  	
175  	  if (w <= 32) {
176  	    c->w32 = a->w32 ^ b->w32;
177  	  } else if (w <= 64) {
178  	    c->w64 = a->w64 ^ b->w64;
179  	  } else {
180  	    c->w128[0] = a->w128[0] ^ b->w128[0];
181  	    c->w128[1] = a->w128[1] ^ b->w128[1];
182  	  }
183  	}
184  	  
185  	void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
186  	{
187  	  gf_internal_t *h;
188  	  int w;
189  	
190  	  h = (gf_internal_t *) gf->scratch;
191  	  w = h->w;
192  	
193  	  if (w <= 32) {
194  	    c->w32 = gf->multiply.w32(gf, a->w32, b->w32);
195  	  } else if (w <= 64) {
196  	    c->w64 = gf->multiply.w64(gf, a->w64, b->w64);
197  	  } else {
198  	    gf->multiply.w128(gf, a->w128, b->w128, c->w128);
199  	  }
200  	}
201  	  
202  	void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
203  	{
204  	  gf_internal_t *h;
205  	  int w;
206  	
207  	  h = (gf_internal_t *) gf->scratch;
208  	  w = h->w;
209  	
210  	  if (w <= 32) {
211  	    c->w32 = gf->divide.w32(gf, a->w32, b->w32);
212  	  } else if (w <= 64) {
213  	    c->w64 = gf->divide.w64(gf, a->w64, b->w64);
214  	  } else {
215  	    gf->divide.w128(gf, a->w128, b->w128, c->w128);
216  	  }
217  	}
218  	  
219  	void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b)
220  	{
221  	  gf_internal_t *h;
222  	  int w;
223  	
224  	  h = (gf_internal_t *) gf->scratch;
225  	  w = h->w;
226  	
227  	  if (w <= 32) {
228  	    b->w32 = gf->inverse.w32(gf, a->w32);
229  	  } else if (w <= 64) {
230  	    b->w64 = gf->inverse.w64(gf, a->w64);
231  	  } else {
232  	    gf->inverse.w128(gf, a->w128, b->w128);
233  	  }
234  	}
235  	  
236  	int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w)
237  	{
238  	  if (w <= 32) {
239  	    return (v1->w32 == v2->w32);
240  	  } else if (w <= 64) {
241  	    return (v1->w64 == v2->w64);
242  	  } else {
243  	    return (v1->w128[0] == v2->w128[0] &&
244  	            v1->w128[1] == v2->w128[1]);
245  	  }
246  	}
247  	
248  	void gf_general_do_region_multiply(gf_t *gf, gf_general_t *a, void *ra, void *rb, int bytes, int xor)
249  	{
250  	  gf_internal_t *h;
251  	  int w;
252  	
253  	  h = (gf_internal_t *) gf->scratch;
254  	  w = h->w;
255  	
256  	  if (w <= 32) {
257  	    gf->multiply_region.w32(gf, ra, rb, a->w32, bytes, xor);
258  	  } else if (w <= 64) {
259  	    gf->multiply_region.w64(gf, ra, rb, a->w64, bytes, xor);
260  	  } else {
261  	    gf->multiply_region.w128(gf, ra, rb, a->w128, bytes, xor);
262  	  }
263  	}
264  	
265  	void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *orig_target, void *final_target, int bytes, int xor)
266  	{
267  	  gf_internal_t *h;
268  	  int w, words, i;
269  	  gf_general_t oa, ot, ft, sb;
270  	  char sa[50], soa[50], sot[50], sft[50], ssb[50];
271  	
272  	  h = (gf_internal_t *) gf->scratch;
273  	  w = h->w;
274  	
275  	  words = (bytes * 8) / w;
276  	  for (i = 0; i < words; i++) {
277  	    if (w <= 32) {
278  	      oa.w32 = gf->extract_word.w32(gf, orig_a, bytes, i);
279  	      ot.w32 = gf->extract_word.w32(gf, orig_target, bytes, i);
280  	      ft.w32 = gf->extract_word.w32(gf, final_target, bytes, i);
281  	      sb.w32 = gf->multiply.w32(gf, a->w32, oa.w32);
282  	      if (xor) sb.w32 ^= ot.w32;
283  	    } else if (w <= 64) {
284  	      oa.w64 = gf->extract_word.w64(gf, orig_a, bytes, i);
285  	      ot.w64 = gf->extract_word.w64(gf, orig_target, bytes, i);
286  	      ft.w64 = gf->extract_word.w64(gf, final_target, bytes, i);
287  	      sb.w64 = gf->multiply.w64(gf, a->w64, oa.w64);
288  	      if (xor) sb.w64 ^= ot.w64;
289  	    } else {
290  	      gf->extract_word.w128(gf, orig_a, bytes, i, oa.w128);
291  	      gf->extract_word.w128(gf, orig_target, bytes, i, ot.w128);
292  	      gf->extract_word.w128(gf, final_target, bytes, i, ft.w128);
293  	      gf->multiply.w128(gf, a->w128, oa.w128, sb.w128);
294  	      if (xor) {
295  	        sb.w128[0] ^= ot.w128[0];
296  	        sb.w128[1] ^= ot.w128[1];
297  	      }
298  	    }
299  	
300  	    if (!gf_general_are_equal(&ft, &sb, w)) {
301  	      
302  	      fprintf(stderr,"Problem with region multiply (all values in hex):\n");
303  	      fprintf(stderr,"   Target address base: 0x%lx.  Word 0x%x of 0x%x.  Xor: %d\n", 
304  	                 (unsigned long) final_target, i, words, xor);
305  	      gf_general_val_to_s(a, w, sa, 1);
306  	      gf_general_val_to_s(&oa, w, soa, 1);
307  	      gf_general_val_to_s(&ot, w, sot, 1);
308  	      gf_general_val_to_s(&ft, w, sft, 1);
309  	      gf_general_val_to_s(&sb, w, ssb, 1);
310  	      fprintf(stderr,"   Value: %s\n", sa);
311  	      fprintf(stderr,"   Original source word: %s\n", soa);
312  	      if (xor) fprintf(stderr,"   XOR with target word: %s\n", sot);
313  	      fprintf(stderr,"   Product word: %s\n", sft);
314  	      fprintf(stderr,"   It should be: %s\n", ssb);
315  	      assert(0);
316  	    }
317  	  }
318  	}
319  	
320  	void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
321  	{
322  	  void *top;
323  	  gf_general_t g;
324  	  uint8_t *r8, *r8a;
325  	  uint16_t *r16;
326  	  uint32_t *r32;
327  	  uint64_t *r64;
328  	  int i;
329  	
330  	  top = (uint8_t *)rb+size;
331  	
332  	  /* If w is 8, 16, 32, 64 or 128, fill the regions with random bytes.
333  	     However, don't allow for zeros in rb, because that will screw up
334  	     division.
335  	     
336  	     When w is 4, you fill the regions with random 4-bit words in each byte.
337  	
338  	     Otherwise, treat every four bytes as an uint32_t
339  	     and fill it with a random value mod (1 << w).
340  	   */
341  	
342  	  if (w == 8 || w == 16 || w == 32 || w == 64 || w == 128) {
343  	    MOA_Fill_Random_Region (ra, size);
344  	    while (rb < top) {
345  	      gf_general_set_random(&g, w, 0);
346  	      switch (w) {
347  	        case 8: 
348  	          r8 = (uint8_t *) rb;
349  	          *r8 = g.w32;
350  	          break;
351  	        case 16: 
352  	          r16 = (uint16_t *) rb;
353  	          *r16 = g.w32;
354  	          break;
355  	        case 32: 
356  	          r32 = (uint32_t *) rb;
357  	          *r32 = g.w32;
358  	          break;
359  	        case 64:
360  	          r64 = (uint64_t *) rb;
361  	          *r64 = g.w64;
362  	          break;
363  	        case 128: 
364  	          r64 = (uint64_t *) rb;
365  	          r64[0] = g.w128[0];
366  	          r64[1] = g.w128[1];
367  	          break;
368  	      }
369  	      rb = (uint8_t *)rb + (w/8);
370  	    }
371  	  } else if (w == 4) {
372  	    r8a = (uint8_t *) ra;
373  	    r8 = (uint8_t *) rb;
374  	    while (r8 < (uint8_t *) top) {
375  	      gf_general_set_random(&g, w, 1);
376  	      *r8a = g.w32;
377  	      gf_general_set_random(&g, w, 0);
378  	      *r8 = g.w32;
379  	      r8a++;
380  	      r8++;
381  	    }
382  	  } else {
383  	    r32 = (uint32_t *) ra;
384  	    for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 1);
385  	    r32 = (uint32_t *) rb;
386  	    for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 0);
387  	  }
388  	}
389  	
390  	/* This sucks, but in order to time, you really need to avoid putting ifs in 
391  	   the inner loops.  So, I'm doing a separate timing test for each w: 
392  	   (4 & 8), 16, 32, 64, 128 and everything else.  Fortunately, the "everything else"
393  	   tests can be equivalent to w=32.
394  	
395  	   I'm also putting the results back into ra, because otherwise, the optimizer might
396  	   figure out that we're not really doing anything in the inner loops and it 
397  	   will chuck that. */
398  	
399  	int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, char test)
400  	{
401  	  gf_internal_t *h;
402  	  void *top;
403  	  uint8_t *r8a, *r8b, *top8;
404  	  uint16_t *r16a, *r16b, *top16;
405  	  uint32_t *r32a, *r32b, *top32;
406  	  uint64_t *r64a, *r64b, *top64, *r64c;
407  	  int w, rv;
408  	
409  	  h = (gf_internal_t *) gf->scratch;
410  	  w = h->w;
411  	  top = (uint8_t *)ra + size;
412  	
413  	  if (w == 8 || w == 4) {
414  	    r8a = (uint8_t *) ra; 
415  	    r8b = (uint8_t *) rb; 
416  	    top8 = (uint8_t *) top;
417  	    if (test == 'M') {
418  	      while (r8a < top8) {
419  	        *r8a = gf->multiply.w32(gf, *r8a, *r8b);
420  	        r8a++;
421  	        r8b++;
422  	      }
423  	    } else if (test == 'D') {
424  	      while (r8a < top8) {
425  	        *r8a = gf->divide.w32(gf, *r8a, *r8b);
426  	        r8a++;
427  	        r8b++;
428  	      }
429  	    } else if (test == 'I') {
430  	      while (r8a < top8) {
431  	        *r8a = gf->inverse.w32(gf, *r8a);
432  	        r8a++;
433  	      }
434  	    }
435  	    return (top8 - (uint8_t *) ra);
436  	  }
437  	
438  	  if (w == 16) {
439  	    r16a = (uint16_t *) ra; 
440  	    r16b = (uint16_t *) rb; 
441  	    top16 = (uint16_t *) top;
442  	    if (test == 'M') {
443  	      while (r16a < top16) {
444  	        *r16a = gf->multiply.w32(gf, *r16a, *r16b);
445  	        r16a++;
446  	        r16b++;
447  	      }
448  	    } else if (test == 'D') {
449  	      while (r16a < top16) {
450  	        *r16a = gf->divide.w32(gf, *r16a, *r16b);
451  	        r16a++;
452  	        r16b++;
453  	      }
454  	    } else if (test == 'I') {
455  	      while (r16a < top16) {
456  	        *r16a = gf->inverse.w32(gf, *r16a);
457  	        r16a++;
458  	      }
459  	    }
460  	    return (top16 - (uint16_t *) ra);
461  	  }
462  	  if (w <= 32) {
463  	    r32a = (uint32_t *) ra; 
464  	    r32b = (uint32_t *) rb; 
465  	    top32 = (uint32_t *) ra + (size/4); /* This is for the "everything elses" */
466  	    
467  	    if (test == 'M') {
468  	      while (r32a < top32) {
469  	        *r32a = gf->multiply.w32(gf, *r32a, *r32b);
470  	        r32a++;
471  	        r32b++;
472  	      }
473  	    } else if (test == 'D') {
474  	      while (r32a < top32) {
475  	        *r32a = gf->divide.w32(gf, *r32a, *r32b);
476  	        r32a++;
477  	        r32b++;
478  	      }
479  	    } else if (test == 'I') {
480  	      while (r32a < top32) {
481  	        *r32a = gf->inverse.w32(gf, *r32a);
482  	        r32a++;
483  	      }
484  	    }
485  	    return (top32 - (uint32_t *) ra);
486  	  }
487  	  if (w == 64) {
488  	    r64a = (uint64_t *) ra; 
489  	    r64b = (uint64_t *) rb; 
490  	    top64 = (uint64_t *) top;
491  	    if (test == 'M') {
492  	      while (r64a < top64) {
493  	        *r64a = gf->multiply.w64(gf, *r64a, *r64b);
494  	        r64a++;
495  	        r64b++;
496  	      }
497  	    } else if (test == 'D') {
498  	      while (r64a < top64) {
499  	        *r64a = gf->divide.w64(gf, *r64a, *r64b);
500  	        r64a++;
501  	        r64b++;
502  	      }
503  	    } else if (test == 'I') {
504  	      while (r64a < top64) {
505  	        *r64a = gf->inverse.w64(gf, *r64a);
506  	        r64a++;
507  	      }
508  	    }
509  	    return (top64 - (uint64_t *) ra);
510  	  }
511  	  if (w == 128) {
512  	    r64a = (uint64_t *) ra; 
513  	    r64c = r64a;
514  	    r64a += 2;
515  	    r64b = (uint64_t *) rb; 
516  	    top64 = (uint64_t *) top;
517  	    rv = (top64 - r64a)/2;
518  	    if (test == 'M') {
519  	      while (r64a < top64) {
520  	        gf->multiply.w128(gf, r64a, r64b, r64c);
521  	        r64a += 2;
522  	        r64b += 2;
523  	      }
524  	    } else if (test == 'D') {
525  	      while (r64a < top64) {
526  	        gf->divide.w128(gf, r64a, r64b, r64c);
527  	        r64a += 2;
528  	        r64b += 2;
529  	      }
530  	    } else if (test == 'I') {
531  	      while (r64a < top64) {
532  	        gf->inverse.w128(gf, r64a, r64c);
533  	        r64a += 2;
534  	      }
535  	    }
536  	    return rv;
537  	  }
538  	  return 0;
539  	}
540