File: | home/bhubbard/working/src/ceph/src/erasure-code/jerasure/jerasure/src/jerasure.c |
Warning: | line 809, column 9 Branch condition evaluates to a garbage value |
[?] Use j/k keys for keyboard navigation
1 | /* * | |||
2 | * Copyright (c) 2014, James S. Plank and Kevin Greenan | |||
3 | * All rights reserved. | |||
4 | * | |||
5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure | |||
6 | * Coding Techniques | |||
7 | * | |||
8 | * Revision 2.0: Galois Field backend now links to GF-Complete | |||
9 | * | |||
10 | * Redistribution and use in source and binary forms, with or without | |||
11 | * modification, are permitted provided that the following conditions | |||
12 | * are met: | |||
13 | * | |||
14 | * - Redistributions of source code must retain the above copyright | |||
15 | * notice, this list of conditions and the following disclaimer. | |||
16 | * | |||
17 | * - Redistributions in binary form must reproduce the above copyright | |||
18 | * notice, this list of conditions and the following disclaimer in | |||
19 | * the documentation and/or other materials provided with the | |||
20 | * distribution. | |||
21 | * | |||
22 | * - Neither the name of the University of Tennessee nor the names of its | |||
23 | * contributors may be used to endorse or promote products derived | |||
24 | * from this software without specific prior written permission. | |||
25 | * | |||
26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |||
31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | |||
32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | |||
33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |||
34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY | |||
36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
37 | * POSSIBILITY OF SUCH DAMAGE. | |||
38 | */ | |||
39 | ||||
40 | /* Jerasure's authors: | |||
41 | ||||
42 | Revision 2.x - 2014: James S. Plank and Kevin M. Greenan | |||
43 | Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman. | |||
44 | Revision 1.0 - 2007: James S. Plank | |||
45 | */ | |||
46 | ||||
47 | #include <stdio.h> | |||
48 | #include <stdlib.h> | |||
49 | #include <string.h> | |||
50 | #include <assert.h> | |||
51 | ||||
52 | #include "galois.h" | |||
53 | #include "jerasure.h" | |||
54 | ||||
55 | #define talloc(type, num)(type *) malloc(sizeof(type)*(num)) (type *) malloc(sizeof(type)*(num)) | |||
56 | ||||
57 | static double jerasure_total_xor_bytes = 0; | |||
58 | static double jerasure_total_gf_bytes = 0; | |||
59 | static double jerasure_total_memcpy_bytes = 0; | |||
60 | ||||
61 | void jerasure_print_matrix(int *m, int rows, int cols, int w) | |||
62 | { | |||
63 | int i, j; | |||
64 | int fw; | |||
65 | char s[30]; | |||
66 | unsigned int w2; | |||
67 | ||||
68 | if (w == 32) { | |||
69 | fw = 10; | |||
70 | } else { | |||
71 | w2 = (1 << w); | |||
72 | sprintf(s, "%u", w2-1)__builtin___sprintf_chk (s, 2 - 1, __builtin_object_size (s, 2 > 1), "%u", w2-1); | |||
73 | fw = strlen(s); | |||
74 | } | |||
75 | ||||
76 | for (i = 0; i < rows; i++) { | |||
77 | for (j = 0; j < cols; j++) { | |||
78 | if (j != 0) printf(" ")__printf_chk (2 - 1, " "); | |||
79 | printf("%*u", fw, m[i*cols+j])__printf_chk (2 - 1, "%*u", fw, m[i*cols+j]); | |||
80 | } | |||
81 | printf("\n")__printf_chk (2 - 1, "\n"); | |||
82 | } | |||
83 | } | |||
84 | ||||
85 | void jerasure_print_bitmatrix(int *m, int rows, int cols, int w) | |||
86 | { | |||
87 | int i, j; | |||
88 | ||||
89 | for (i = 0; i < rows; i++) { | |||
90 | if (i != 0 && i%w == 0) printf("\n")__printf_chk (2 - 1, "\n"); | |||
91 | for (j = 0; j < cols; j++) { | |||
92 | if (j != 0 && j%w == 0) printf(" ")__printf_chk (2 - 1, " "); | |||
93 | printf("%d", m[i*cols+j])__printf_chk (2 - 1, "%d", m[i*cols+j]); | |||
94 | } | |||
95 | printf("\n")__printf_chk (2 - 1, "\n"); | |||
96 | } | |||
97 | } | |||
98 | ||||
99 | int jerasure_make_decoding_matrix(int k, int m, int w, int *matrix, int *erased, int *decoding_matrix, int *dm_ids) | |||
100 | { | |||
101 | int i, j, *tmpmat; | |||
102 | ||||
103 | j = 0; | |||
104 | for (i = 0; j < k; i++) { | |||
105 | if (erased[i] == 0) { | |||
106 | dm_ids[j] = i; | |||
107 | j++; | |||
108 | } | |||
109 | } | |||
110 | ||||
111 | tmpmat = talloc(int, k*k)(int *) malloc(sizeof(int)*(k*k)); | |||
112 | if (tmpmat == NULL((void*)0)) { return -1; } | |||
113 | for (i = 0; i < k; i++) { | |||
114 | if (dm_ids[i] < k) { | |||
115 | for (j = 0; j < k; j++) tmpmat[i*k+j] = 0; | |||
116 | tmpmat[i*k+dm_ids[i]] = 1; | |||
117 | } else { | |||
118 | for (j = 0; j < k; j++) { | |||
119 | tmpmat[i*k+j] = matrix[(dm_ids[i]-k)*k+j]; | |||
120 | } | |||
121 | } | |||
122 | } | |||
123 | ||||
124 | i = jerasure_invert_matrix(tmpmat, decoding_matrix, k, w); | |||
125 | free(tmpmat); | |||
126 | return i; | |||
127 | } | |||
128 | ||||
129 | /* Internal Routine */ | |||
130 | int jerasure_make_decoding_bitmatrix(int k, int m, int w, int *matrix, int *erased, int *decoding_matrix, int *dm_ids) | |||
131 | { | |||
132 | int i, j, *tmpmat; | |||
133 | int index, mindex; | |||
134 | ||||
135 | j = 0; | |||
136 | for (i = 0; j < k; i++) { | |||
137 | if (erased[i] == 0) { | |||
138 | dm_ids[j] = i; | |||
139 | j++; | |||
140 | } | |||
141 | } | |||
142 | ||||
143 | tmpmat = talloc(int, k*k*w*w)(int *) malloc(sizeof(int)*(k*k*w*w)); | |||
144 | if (tmpmat == NULL((void*)0)) { return -1; } | |||
145 | for (i = 0; i < k; i++) { | |||
146 | if (dm_ids[i] < k) { | |||
147 | index = i*k*w*w; | |||
148 | for (j = 0; j < k*w*w; j++) tmpmat[index+j] = 0; | |||
149 | index = i*k*w*w+dm_ids[i]*w; | |||
150 | for (j = 0; j < w; j++) { | |||
151 | tmpmat[index] = 1; | |||
152 | index += (k*w+1); | |||
153 | } | |||
154 | } else { | |||
155 | index = i*k*w*w; | |||
156 | mindex = (dm_ids[i]-k)*k*w*w; | |||
157 | for (j = 0; j < k*w*w; j++) { | |||
158 | tmpmat[index+j] = matrix[mindex+j]; | |||
159 | } | |||
160 | } | |||
161 | } | |||
162 | ||||
163 | i = jerasure_invert_bitmatrix(tmpmat, decoding_matrix, k*w); | |||
164 | free(tmpmat); | |||
165 | return i; | |||
166 | } | |||
167 | ||||
168 | int jerasure_matrix_decode(int k, int m, int w, int *matrix, int row_k_ones, int *erasures, | |||
169 | char **data_ptrs, char **coding_ptrs, int size) | |||
170 | { | |||
171 | int i, edd, lastdrive; | |||
172 | int *tmpids; | |||
173 | int *erased, *decoding_matrix, *dm_ids; | |||
174 | ||||
175 | if (w != 8 && w != 16 && w != 32) return -1; | |||
176 | ||||
177 | erased = jerasure_erasures_to_erased(k, m, erasures); | |||
178 | if (erased == NULL((void*)0)) return -1; | |||
179 | ||||
180 | /* Find the number of data drives failed */ | |||
181 | ||||
182 | lastdrive = k; | |||
183 | ||||
184 | edd = 0; | |||
185 | for (i = 0; i < k; i++) { | |||
186 | if (erased[i]) { | |||
187 | edd++; | |||
188 | lastdrive = i; | |||
189 | } | |||
190 | } | |||
191 | ||||
192 | /* You only need to create the decoding matrix in the following cases: | |||
193 | ||||
194 | 1. edd > 0 and row_k_ones is false. | |||
195 | 2. edd > 0 and row_k_ones is true and coding device 0 has been erased. | |||
196 | 3. edd > 1 | |||
197 | ||||
198 | We're going to use lastdrive to denote when to stop decoding data. | |||
199 | At this point in the code, it is equal to the last erased data device. | |||
200 | However, if we can't use the parity row to decode it (i.e. row_k_ones=0 | |||
201 | or erased[k] = 1, we're going to set it to k so that the decoding | |||
202 | pass will decode all data. | |||
203 | */ | |||
204 | ||||
205 | if (!row_k_ones || erased[k]) lastdrive = k; | |||
206 | ||||
207 | dm_ids = NULL((void*)0); | |||
208 | decoding_matrix = NULL((void*)0); | |||
209 | ||||
210 | if (edd > 1 || (edd > 0 && (!row_k_ones || erased[k]))) { | |||
211 | dm_ids = talloc(int, k)(int *) malloc(sizeof(int)*(k)); | |||
212 | if (dm_ids == NULL((void*)0)) { | |||
213 | free(erased); | |||
214 | return -1; | |||
215 | } | |||
216 | ||||
217 | decoding_matrix = talloc(int, k*k)(int *) malloc(sizeof(int)*(k*k)); | |||
218 | if (decoding_matrix == NULL((void*)0)) { | |||
219 | free(erased); | |||
220 | free(dm_ids); | |||
221 | return -1; | |||
222 | } | |||
223 | ||||
224 | if (jerasure_make_decoding_matrix(k, m, w, matrix, erased, decoding_matrix, dm_ids) < 0) { | |||
225 | free(erased); | |||
226 | free(dm_ids); | |||
227 | free(decoding_matrix); | |||
228 | return -1; | |||
229 | } | |||
230 | } | |||
231 | ||||
232 | /* Decode the data drives. | |||
233 | If row_k_ones is true and coding device 0 is intact, then only decode edd-1 drives. | |||
234 | This is done by stopping at lastdrive. | |||
235 | We test whether edd > 0 so that we can exit the loop early if we're done. | |||
236 | */ | |||
237 | ||||
238 | for (i = 0; edd > 0 && i < lastdrive; i++) { | |||
239 | if (erased[i]) { | |||
240 | jerasure_matrix_dotprod(k, w, decoding_matrix+(i*k), dm_ids, i, data_ptrs, coding_ptrs, size); | |||
241 | edd--; | |||
242 | } | |||
243 | } | |||
244 | ||||
245 | /* Then if necessary, decode drive lastdrive */ | |||
246 | ||||
247 | if (edd > 0) { | |||
248 | tmpids = talloc(int, k)(int *) malloc(sizeof(int)*(k)); | |||
249 | for (i = 0; i < k; i++) { | |||
250 | tmpids[i] = (i < lastdrive) ? i : i+1; | |||
251 | } | |||
252 | jerasure_matrix_dotprod(k, w, matrix, tmpids, lastdrive, data_ptrs, coding_ptrs, size); | |||
253 | free(tmpids); | |||
254 | } | |||
255 | ||||
256 | /* Finally, re-encode any erased coding devices */ | |||
257 | ||||
258 | for (i = 0; i < m; i++) { | |||
259 | if (erased[k+i]) { | |||
260 | jerasure_matrix_dotprod(k, w, matrix+(i*k), NULL((void*)0), i+k, data_ptrs, coding_ptrs, size); | |||
261 | } | |||
262 | } | |||
263 | ||||
264 | free(erased); | |||
265 | if (dm_ids != NULL((void*)0)) free(dm_ids); | |||
266 | if (decoding_matrix != NULL((void*)0)) free(decoding_matrix); | |||
267 | ||||
268 | return 0; | |||
269 | } | |||
270 | ||||
271 | ||||
272 | int *jerasure_matrix_to_bitmatrix(int k, int m, int w, int *matrix) | |||
273 | { | |||
274 | int *bitmatrix; | |||
275 | int rowelts, rowindex, colindex, elt, i, j, l, x; | |||
276 | ||||
277 | if (matrix == NULL((void*)0)) { return NULL((void*)0); } | |||
278 | ||||
279 | bitmatrix = talloc(int, k*m*w*w)(int *) malloc(sizeof(int)*(k*m*w*w)); | |||
280 | ||||
281 | rowelts = k * w; | |||
282 | rowindex = 0; | |||
283 | ||||
284 | for (i = 0; i < m; i++) { | |||
285 | colindex = rowindex; | |||
286 | for (j = 0; j < k; j++) { | |||
287 | elt = matrix[i*k+j]; | |||
288 | for (x = 0; x < w; x++) { | |||
289 | for (l = 0; l < w; l++) { | |||
290 | bitmatrix[colindex+x+l*rowelts] = ((elt & (1 << l)) ? 1 : 0); | |||
291 | } | |||
292 | elt = galois_single_multiply(elt, 2, w); | |||
293 | } | |||
294 | colindex += w; | |||
295 | } | |||
296 | rowindex += rowelts * w; | |||
297 | } | |||
298 | return bitmatrix; | |||
299 | } | |||
300 | ||||
301 | void jerasure_matrix_encode(int k, int m, int w, int *matrix, | |||
302 | char **data_ptrs, char **coding_ptrs, int size) | |||
303 | { | |||
304 | int i; | |||
305 | ||||
306 | if (w != 8 && w != 16 && w != 32) { | |||
307 | fprintf(stderr, "ERROR: jerasure_matrix_encode() and w is not 8, 16 or 32\n")__fprintf_chk (stderr, 2 - 1, "ERROR: jerasure_matrix_encode() and w is not 8, 16 or 32\n" ); | |||
308 | assert(0)((void) (0)); | |||
309 | } | |||
310 | ||||
311 | for (i = 0; i < m; i++) { | |||
312 | jerasure_matrix_dotprod(k, w, matrix+(i*k), NULL((void*)0), k+i, data_ptrs, coding_ptrs, size); | |||
313 | } | |||
314 | } | |||
315 | ||||
316 | void jerasure_bitmatrix_dotprod(int k, int w, int *bitmatrix_row, | |||
317 | int *src_ids, int dest_id, | |||
318 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||
319 | { | |||
320 | int j, sindex, pstarted, index, x, y; | |||
321 | char *dptr, *pptr, *bdptr, *bpptr; | |||
322 | ||||
323 | if (size%(w*packetsize) != 0) { | |||
324 | fprintf(stderr, "jerasure_bitmatrix_dotprod - size%c(w*packetsize)) must = 0\n", '%')__fprintf_chk (stderr, 2 - 1, "jerasure_bitmatrix_dotprod - size%c(w*packetsize)) must = 0\n" , '%'); | |||
325 | assert(0)((void) (0)); | |||
326 | } | |||
327 | ||||
328 | bpptr = (dest_id < k) ? data_ptrs[dest_id] : coding_ptrs[dest_id-k]; | |||
329 | ||||
330 | for (sindex = 0; sindex < size; sindex += (packetsize*w)) { | |||
331 | index = 0; | |||
332 | for (j = 0; j < w; j++) { | |||
333 | pstarted = 0; | |||
334 | pptr = bpptr + sindex + j*packetsize; | |||
335 | for (x = 0; x < k; x++) { | |||
336 | if (src_ids == NULL((void*)0)) { | |||
337 | bdptr = data_ptrs[x]; | |||
338 | } else if (src_ids[x] < k) { | |||
339 | bdptr = data_ptrs[src_ids[x]]; | |||
340 | } else { | |||
341 | bdptr = coding_ptrs[src_ids[x]-k]; | |||
342 | } | |||
343 | for (y = 0; y < w; y++) { | |||
344 | if (bitmatrix_row[index]) { | |||
345 | dptr = bdptr + sindex + y*packetsize; | |||
346 | if (!pstarted) { | |||
347 | memcpy(pptr, dptr, packetsize); | |||
348 | jerasure_total_memcpy_bytes += packetsize; | |||
349 | pstarted = 1; | |||
350 | } else { | |||
351 | galois_region_xor(dptr, pptr, packetsize); | |||
352 | jerasure_total_xor_bytes += packetsize; | |||
353 | } | |||
354 | } | |||
355 | index++; | |||
356 | } | |||
357 | } | |||
358 | } | |||
359 | } | |||
360 | } | |||
361 | ||||
362 | void jerasure_do_parity(int k, char **data_ptrs, char *parity_ptr, int size) | |||
363 | { | |||
364 | int i; | |||
365 | ||||
366 | memcpy(parity_ptr, data_ptrs[0], size); | |||
367 | jerasure_total_memcpy_bytes += size; | |||
368 | ||||
369 | for (i = 1; i < k; i++) { | |||
370 | galois_region_xor(data_ptrs[i], parity_ptr, size); | |||
371 | jerasure_total_xor_bytes += size; | |||
372 | } | |||
373 | } | |||
374 | ||||
375 | int jerasure_invert_matrix(int *mat, int *inv, int rows, int w) | |||
376 | { | |||
377 | int cols, i, j, k, x, rs2; | |||
378 | int row_start, tmp, inverse; | |||
379 | ||||
380 | cols = rows; | |||
381 | ||||
382 | k = 0; | |||
383 | for (i = 0; i < rows; i++) { | |||
384 | for (j = 0; j < cols; j++) { | |||
385 | inv[k] = (i == j) ? 1 : 0; | |||
386 | k++; | |||
387 | } | |||
388 | } | |||
389 | ||||
390 | /* First -- convert into upper triangular */ | |||
391 | for (i = 0; i < cols; i++) { | |||
392 | row_start = cols*i; | |||
393 | ||||
394 | /* Swap rows if we ave a zero i,i element. If we can't swap, then the | |||
395 | matrix was not invertible */ | |||
396 | ||||
397 | if (mat[row_start+i] == 0) { | |||
398 | for (j = i+1; j < rows && mat[cols*j+i] == 0; j++) ; | |||
399 | if (j == rows) return -1; | |||
400 | rs2 = j*cols; | |||
401 | for (k = 0; k < cols; k++) { | |||
402 | tmp = mat[row_start+k]; | |||
403 | mat[row_start+k] = mat[rs2+k]; | |||
404 | mat[rs2+k] = tmp; | |||
405 | tmp = inv[row_start+k]; | |||
406 | inv[row_start+k] = inv[rs2+k]; | |||
407 | inv[rs2+k] = tmp; | |||
408 | } | |||
409 | } | |||
410 | ||||
411 | /* Multiply the row by 1/element i,i */ | |||
412 | tmp = mat[row_start+i]; | |||
413 | if (tmp != 1) { | |||
414 | inverse = galois_single_divide(1, tmp, w); | |||
415 | for (j = 0; j < cols; j++) { | |||
416 | mat[row_start+j] = galois_single_multiply(mat[row_start+j], inverse, w); | |||
417 | inv[row_start+j] = galois_single_multiply(inv[row_start+j], inverse, w); | |||
418 | } | |||
419 | } | |||
420 | ||||
421 | /* Now for each j>i, add A_ji*Ai to Aj */ | |||
422 | k = row_start+i; | |||
423 | for (j = i+1; j != cols; j++) { | |||
424 | k += cols; | |||
425 | if (mat[k] != 0) { | |||
426 | if (mat[k] == 1) { | |||
427 | rs2 = cols*j; | |||
428 | for (x = 0; x < cols; x++) { | |||
429 | mat[rs2+x] ^= mat[row_start+x]; | |||
430 | inv[rs2+x] ^= inv[row_start+x]; | |||
431 | } | |||
432 | } else { | |||
433 | tmp = mat[k]; | |||
434 | rs2 = cols*j; | |||
435 | for (x = 0; x < cols; x++) { | |||
436 | mat[rs2+x] ^= galois_single_multiply(tmp, mat[row_start+x], w); | |||
437 | inv[rs2+x] ^= galois_single_multiply(tmp, inv[row_start+x], w); | |||
438 | } | |||
439 | } | |||
440 | } | |||
441 | } | |||
442 | } | |||
443 | ||||
444 | /* Now the matrix is upper triangular. Start at the top and multiply down */ | |||
445 | ||||
446 | for (i = rows-1; i >= 0; i--) { | |||
447 | row_start = i*cols; | |||
448 | for (j = 0; j < i; j++) { | |||
449 | rs2 = j*cols; | |||
450 | if (mat[rs2+i] != 0) { | |||
451 | tmp = mat[rs2+i]; | |||
452 | mat[rs2+i] = 0; | |||
453 | for (k = 0; k < cols; k++) { | |||
454 | inv[rs2+k] ^= galois_single_multiply(tmp, inv[row_start+k], w); | |||
455 | } | |||
456 | } | |||
457 | } | |||
458 | } | |||
459 | return 0; | |||
460 | } | |||
461 | ||||
462 | int jerasure_invertible_matrix(int *mat, int rows, int w) | |||
463 | { | |||
464 | int cols, i, j, k, x, rs2; | |||
465 | int row_start, tmp, inverse; | |||
466 | ||||
467 | cols = rows; | |||
468 | ||||
469 | /* First -- convert into upper triangular */ | |||
470 | for (i = 0; i < cols; i++) { | |||
471 | row_start = cols*i; | |||
472 | ||||
473 | /* Swap rows if we ave a zero i,i element. If we can't swap, then the | |||
474 | matrix was not invertible */ | |||
475 | ||||
476 | if (mat[row_start+i] == 0) { | |||
477 | for (j = i+1; j < rows && mat[cols*j+i] == 0; j++) ; | |||
478 | if (j == rows) return 0; | |||
479 | rs2 = j*cols; | |||
480 | for (k = 0; k < cols; k++) { | |||
481 | tmp = mat[row_start+k]; | |||
482 | mat[row_start+k] = mat[rs2+k]; | |||
483 | mat[rs2+k] = tmp; | |||
484 | } | |||
485 | } | |||
486 | ||||
487 | /* Multiply the row by 1/element i,i */ | |||
488 | tmp = mat[row_start+i]; | |||
489 | if (tmp != 1) { | |||
490 | inverse = galois_single_divide(1, tmp, w); | |||
491 | for (j = 0; j < cols; j++) { | |||
492 | mat[row_start+j] = galois_single_multiply(mat[row_start+j], inverse, w); | |||
493 | } | |||
494 | } | |||
495 | ||||
496 | /* Now for each j>i, add A_ji*Ai to Aj */ | |||
497 | k = row_start+i; | |||
498 | for (j = i+1; j != cols; j++) { | |||
499 | k += cols; | |||
500 | if (mat[k] != 0) { | |||
501 | if (mat[k] == 1) { | |||
502 | rs2 = cols*j; | |||
503 | for (x = 0; x < cols; x++) { | |||
504 | mat[rs2+x] ^= mat[row_start+x]; | |||
505 | } | |||
506 | } else { | |||
507 | tmp = mat[k]; | |||
508 | rs2 = cols*j; | |||
509 | for (x = 0; x < cols; x++) { | |||
510 | mat[rs2+x] ^= galois_single_multiply(tmp, mat[row_start+x], w); | |||
511 | } | |||
512 | } | |||
513 | } | |||
514 | } | |||
515 | } | |||
516 | return 1; | |||
517 | } | |||
518 | ||||
519 | /* Converts a list-style version of the erasures into an array of k+m elements | |||
520 | where the element = 1 if the index has been erased, and zero otherwise */ | |||
521 | ||||
522 | int *jerasure_erasures_to_erased(int k, int m, int *erasures) | |||
523 | { | |||
524 | int td; | |||
525 | int t_non_erased; | |||
526 | int *erased; | |||
527 | int i; | |||
528 | ||||
529 | td = k+m; | |||
530 | erased = talloc(int, td)(int *) malloc(sizeof(int)*(td)); | |||
531 | if (erased == NULL((void*)0)) return NULL((void*)0); | |||
532 | t_non_erased = td; | |||
533 | ||||
534 | for (i = 0; i < td; i++) erased[i] = 0; | |||
535 | ||||
536 | for (i = 0; erasures[i] != -1; i++) { | |||
537 | if (erased[erasures[i]] == 0) { | |||
538 | erased[erasures[i]] = 1; | |||
539 | t_non_erased--; | |||
540 | if (t_non_erased < k) { | |||
541 | free(erased); | |||
542 | return NULL((void*)0); | |||
543 | } | |||
544 | } | |||
545 | } | |||
546 | return erased; | |||
547 | } | |||
548 | ||||
549 | void jerasure_free_schedule(int **schedule) | |||
550 | { | |||
551 | int i; | |||
552 | ||||
553 | for (i = 0; schedule[i][0] >= 0; i++) free(schedule[i]); | |||
554 | free(schedule[i]); | |||
555 | free(schedule); | |||
556 | } | |||
557 | ||||
558 | void jerasure_free_schedule_cache(int k, int m, int ***cache) | |||
559 | { | |||
560 | int e1, e2; | |||
561 | ||||
562 | if (m != 2) { | |||
563 | fprintf(stderr, "jerasure_free_schedule_cache(): m must equal 2\n")__fprintf_chk (stderr, 2 - 1, "jerasure_free_schedule_cache(): m must equal 2\n" ); | |||
564 | assert(0)((void) (0)); | |||
565 | } | |||
566 | ||||
567 | for (e1 = 0; e1 < k+m; e1++) { | |||
568 | for (e2 = 0; e2 < e1; e2++) { | |||
569 | jerasure_free_schedule(cache[e1*(k+m)+e2]); | |||
570 | } | |||
571 | jerasure_free_schedule(cache[e1*(k+m)+e1]); | |||
572 | } | |||
573 | free(cache); | |||
574 | } | |||
575 | ||||
576 | void jerasure_matrix_dotprod(int k, int w, int *matrix_row, | |||
577 | int *src_ids, int dest_id, | |||
578 | char **data_ptrs, char **coding_ptrs, int size) | |||
579 | { | |||
580 | int init; | |||
581 | char *dptr, *sptr; | |||
582 | int i; | |||
583 | ||||
584 | if (w != 1 && w != 8 && w != 16 && w != 32) { | |||
585 | fprintf(stderr, "ERROR: jerasure_matrix_dotprod() called and w is not 1, 8, 16 or 32\n")__fprintf_chk (stderr, 2 - 1, "ERROR: jerasure_matrix_dotprod() called and w is not 1, 8, 16 or 32\n" ); | |||
586 | assert(0)((void) (0)); | |||
587 | } | |||
588 | ||||
589 | init = 0; | |||
590 | ||||
591 | dptr = (dest_id < k) ? data_ptrs[dest_id] : coding_ptrs[dest_id-k]; | |||
592 | ||||
593 | /* First copy or xor any data that does not need to be multiplied by a factor */ | |||
594 | ||||
595 | for (i = 0; i < k; i++) { | |||
596 | if (matrix_row[i] == 1) { | |||
597 | if (src_ids == NULL((void*)0)) { | |||
598 | sptr = data_ptrs[i]; | |||
599 | } else if (src_ids[i] < k) { | |||
600 | sptr = data_ptrs[src_ids[i]]; | |||
601 | } else { | |||
602 | sptr = coding_ptrs[src_ids[i]-k]; | |||
603 | } | |||
604 | if (init == 0) { | |||
605 | memcpy(dptr, sptr, size); | |||
606 | jerasure_total_memcpy_bytes += size; | |||
607 | init = 1; | |||
608 | } else { | |||
609 | galois_region_xor(sptr, dptr, size); | |||
610 | jerasure_total_xor_bytes += size; | |||
611 | } | |||
612 | } | |||
613 | } | |||
614 | ||||
615 | /* Now do the data that needs to be multiplied by a factor */ | |||
616 | ||||
617 | for (i = 0; i < k; i++) { | |||
618 | if (matrix_row[i] != 0 && matrix_row[i] != 1) { | |||
619 | if (src_ids == NULL((void*)0)) { | |||
620 | sptr = data_ptrs[i]; | |||
621 | } else if (src_ids[i] < k) { | |||
622 | sptr = data_ptrs[src_ids[i]]; | |||
623 | } else { | |||
624 | sptr = coding_ptrs[src_ids[i]-k]; | |||
625 | } | |||
626 | switch (w) { | |||
627 | case 8: galois_w08_region_multiply(sptr, matrix_row[i], size, dptr, init); break; | |||
628 | case 16: galois_w16_region_multiply(sptr, matrix_row[i], size, dptr, init); break; | |||
629 | case 32: galois_w32_region_multiply(sptr, matrix_row[i], size, dptr, init); break; | |||
630 | } | |||
631 | jerasure_total_gf_bytes += size; | |||
632 | init = 1; | |||
633 | } | |||
634 | } | |||
635 | } | |||
636 | ||||
637 | ||||
638 | int jerasure_bitmatrix_decode(int k, int m, int w, int *bitmatrix, int row_k_ones, int *erasures, | |||
639 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||
640 | { | |||
641 | int i; | |||
642 | int *erased; | |||
643 | int *decoding_matrix; | |||
644 | int *dm_ids; | |||
645 | int edd, *tmpids, lastdrive; | |||
646 | ||||
647 | erased = jerasure_erasures_to_erased(k, m, erasures); | |||
648 | if (erased == NULL((void*)0)) return -1; | |||
649 | ||||
650 | /* See jerasure_matrix_decode for the logic of this routine. This one works just like | |||
651 | it, but calls the bitmatrix ops instead */ | |||
652 | ||||
653 | lastdrive = k; | |||
654 | ||||
655 | edd = 0; | |||
656 | for (i = 0; i < k; i++) { | |||
657 | if (erased[i]) { | |||
658 | edd++; | |||
659 | lastdrive = i; | |||
660 | } | |||
661 | } | |||
662 | ||||
663 | if (row_k_ones != 1 || erased[k]) lastdrive = k; | |||
664 | ||||
665 | dm_ids = NULL((void*)0); | |||
666 | decoding_matrix = NULL((void*)0); | |||
667 | ||||
668 | if (edd > 1 || (edd > 0 && (row_k_ones != 1 || erased[k]))) { | |||
669 | ||||
670 | dm_ids = talloc(int, k)(int *) malloc(sizeof(int)*(k)); | |||
671 | if (dm_ids == NULL((void*)0)) { | |||
672 | free(erased); | |||
673 | return -1; | |||
674 | } | |||
675 | ||||
676 | decoding_matrix = talloc(int, k*k*w*w)(int *) malloc(sizeof(int)*(k*k*w*w)); | |||
677 | if (decoding_matrix == NULL((void*)0)) { | |||
678 | free(erased); | |||
679 | free(dm_ids); | |||
680 | return -1; | |||
681 | } | |||
682 | ||||
683 | if (jerasure_make_decoding_bitmatrix(k, m, w, bitmatrix, erased, decoding_matrix, dm_ids) < 0) { | |||
684 | free(erased); | |||
685 | free(dm_ids); | |||
686 | free(decoding_matrix); | |||
687 | return -1; | |||
688 | } | |||
689 | } | |||
690 | ||||
691 | for (i = 0; edd > 0 && i < lastdrive; i++) { | |||
692 | if (erased[i]) { | |||
693 | jerasure_bitmatrix_dotprod(k, w, decoding_matrix+i*k*w*w, dm_ids, i, data_ptrs, coding_ptrs, size, packetsize); | |||
694 | edd--; | |||
695 | } | |||
696 | } | |||
697 | ||||
698 | if (edd > 0) { | |||
699 | tmpids = talloc(int, k)(int *) malloc(sizeof(int)*(k)); | |||
700 | for (i = 0; i < k; i++) { | |||
701 | tmpids[i] = (i < lastdrive) ? i : i+1; | |||
702 | } | |||
703 | jerasure_bitmatrix_dotprod(k, w, bitmatrix, tmpids, lastdrive, data_ptrs, coding_ptrs, size, packetsize); | |||
704 | free(tmpids); | |||
705 | } | |||
706 | ||||
707 | for (i = 0; i < m; i++) { | |||
708 | if (erased[k+i]) { | |||
709 | jerasure_bitmatrix_dotprod(k, w, bitmatrix+i*k*w*w, NULL((void*)0), k+i, data_ptrs, coding_ptrs, size, packetsize); | |||
710 | } | |||
711 | } | |||
712 | ||||
713 | free(erased); | |||
714 | if (dm_ids != NULL((void*)0)) free(dm_ids); | |||
715 | if (decoding_matrix != NULL((void*)0)) free(decoding_matrix); | |||
716 | ||||
717 | return 0; | |||
718 | } | |||
719 | ||||
720 | static char **set_up_ptrs_for_scheduled_decoding(int k, int m, int *erasures, char **data_ptrs, char **coding_ptrs) | |||
721 | { | |||
722 | int ddf, cdf; | |||
723 | int *erased; | |||
724 | char **ptrs; | |||
725 | int i, j, x; | |||
726 | ||||
727 | ddf = 0; | |||
728 | cdf = 0; | |||
729 | for (i = 0; erasures[i] != -1; i++) { | |||
730 | if (erasures[i] < k) ddf++; else cdf++; | |||
731 | } | |||
732 | ||||
733 | erased = jerasure_erasures_to_erased(k, m, erasures); | |||
734 | if (erased == NULL((void*)0)) return NULL((void*)0); | |||
735 | ||||
736 | /* Set up ptrs. It will be as follows: | |||
737 | ||||
738 | - If data drive i has not failed, then ptrs[i] = data_ptrs[i]. | |||
739 | - If data drive i has failed, then ptrs[i] = coding_ptrs[j], where j is the | |||
740 | lowest unused non-failed coding drive. | |||
741 | - Elements k to k+ddf-1 are data_ptrs[] of the failed data drives. | |||
742 | - Elements k+ddf to k+ddf+cdf-1 are coding_ptrs[] of the failed data drives. | |||
743 | ||||
744 | The array row_ids contains the ids of ptrs. | |||
745 | The array ind_to_row_ids contains the row_id of drive i. | |||
746 | ||||
747 | However, we're going to set row_ids and ind_to_row in a different procedure. | |||
748 | */ | |||
749 | ||||
750 | ptrs = talloc(char *, k+m)(char * *) malloc(sizeof(char *)*(k+m)); | |||
751 | ||||
752 | j = k; | |||
753 | x = k; | |||
754 | for (i = 0; i < k; i++) { | |||
755 | if (erased[i] == 0) { | |||
756 | ptrs[i] = data_ptrs[i]; | |||
757 | } else { | |||
758 | while (erased[j]) j++; | |||
759 | ptrs[i] = coding_ptrs[j-k]; | |||
760 | j++; | |||
761 | ptrs[x] = data_ptrs[i]; | |||
762 | x++; | |||
763 | } | |||
764 | } | |||
765 | for (i = k; i < k+m; i++) { | |||
766 | if (erased[i]) { | |||
767 | ptrs[x] = coding_ptrs[i-k]; | |||
768 | x++; | |||
769 | } | |||
770 | } | |||
771 | free(erased); | |||
772 | return ptrs; | |||
773 | } | |||
774 | ||||
775 | static int set_up_ids_for_scheduled_decoding(int k, int m, int *erasures, int *row_ids, int *ind_to_row) | |||
776 | { | |||
777 | int ddf, cdf; | |||
778 | int *erased; | |||
779 | int i, j, x; | |||
780 | ||||
781 | ddf = 0; | |||
782 | cdf = 0; | |||
783 | for (i = 0; erasures[i] != -1; i++) { | |||
784 | if (erasures[i] < k) ddf++; else cdf++; | |||
785 | } | |||
786 | ||||
787 | erased = jerasure_erasures_to_erased(k, m, erasures); | |||
788 | if (erased == NULL((void*)0)) return -1; | |||
789 | ||||
790 | /* See set_up_ptrs_for_scheduled_decoding for how these are set */ | |||
791 | ||||
792 | j = k; | |||
793 | x = k; | |||
794 | for (i = 0; i < k; i++) { | |||
795 | if (erased[i] == 0) { | |||
796 | row_ids[i] = i; | |||
797 | ind_to_row[i] = i; | |||
798 | } else { | |||
799 | while (erased[j]) j++; | |||
800 | row_ids[i] = j; | |||
801 | ind_to_row[j] = i; | |||
802 | j++; | |||
803 | row_ids[x] = i; | |||
804 | ind_to_row[i] = x; | |||
805 | x++; | |||
806 | } | |||
807 | } | |||
808 | for (i = k; i < k+m; i++) { | |||
809 | if (erased[i]) { | |||
| ||||
810 | row_ids[x] = i; | |||
811 | ind_to_row[i] = x; | |||
812 | x++; | |||
813 | } | |||
814 | } | |||
815 | free(erased); | |||
816 | return 0; | |||
817 | } | |||
818 | ||||
819 | static int **jerasure_generate_decoding_schedule(int k, int m, int w, int *bitmatrix, int *erasures, int smart) | |||
820 | { | |||
821 | int i, j, x, drive, y, index, z; | |||
822 | int *decoding_matrix, *inverse, *real_decoding_matrix; | |||
823 | int *ptr; | |||
824 | int *row_ids; | |||
825 | int *ind_to_row; | |||
826 | int ddf, cdf; | |||
827 | int **schedule; | |||
828 | int *b1, *b2; | |||
829 | ||||
830 | /* First, figure out the number of data drives that have failed, and the | |||
831 | number of coding drives that have failed: ddf and cdf */ | |||
832 | ||||
833 | ddf = 0; | |||
834 | cdf = 0; | |||
835 | for (i = 0; erasures[i] != -1; i++) { | |||
836 | if (erasures[i] < k) ddf++; else cdf++; | |||
837 | } | |||
838 | ||||
839 | row_ids = talloc(int, k+m)(int *) malloc(sizeof(int)*(k+m)); | |||
840 | ind_to_row = talloc(int, k+m)(int *) malloc(sizeof(int)*(k+m)); | |||
841 | ||||
842 | if (set_up_ids_for_scheduled_decoding(k, m, erasures, row_ids, ind_to_row) < 0) return NULL((void*)0); | |||
843 | ||||
844 | /* Now, we're going to create one decoding matrix which is going to | |||
845 | decode everything with one call. The hope is that the scheduler | |||
846 | will do a good job. This matrix has w*e rows, where e is the | |||
847 | number of erasures (ddf+cdf) */ | |||
848 | ||||
849 | real_decoding_matrix = talloc(int, k*w*(cdf+ddf)*w)(int *) malloc(sizeof(int)*(k*w*(cdf+ddf)*w)); | |||
850 | ||||
851 | /* First, if any data drives have failed, then initialize the first | |||
852 | ddf*w rows of the decoding matrix from the standard decoding | |||
853 | matrix inversion */ | |||
854 | ||||
855 | if (ddf > 0) { | |||
856 | ||||
857 | decoding_matrix = talloc(int, k*k*w*w)(int *) malloc(sizeof(int)*(k*k*w*w)); | |||
858 | ptr = decoding_matrix; | |||
859 | for (i = 0; i < k; i++) { | |||
860 | if (row_ids[i] == i) { | |||
861 | bzero(ptr, k*w*w*sizeof(int)); | |||
862 | for (x = 0; x < w; x++) { | |||
863 | ptr[x+i*w+x*k*w] = 1; | |||
864 | } | |||
865 | } else { | |||
866 | memcpy(ptr, bitmatrix+k*w*w*(row_ids[i]-k), k*w*w*sizeof(int)); | |||
867 | } | |||
868 | ptr += (k*w*w); | |||
869 | } | |||
870 | inverse = talloc(int, k*k*w*w)(int *) malloc(sizeof(int)*(k*k*w*w)); | |||
871 | jerasure_invert_bitmatrix(decoding_matrix, inverse, k*w); | |||
872 | ||||
873 | /* printf("\nMatrix to invert\n"); | |||
874 | jerasure_print_bitmatrix(decoding_matrix, k*w, k*w, w); | |||
875 | printf("\n"); | |||
876 | printf("\nInverse\n"); | |||
877 | jerasure_print_bitmatrix(inverse, k*w, k*w, w); | |||
878 | printf("\n"); */ | |||
879 | ||||
880 | free(decoding_matrix); | |||
881 | ptr = real_decoding_matrix; | |||
882 | for (i = 0; i < ddf; i++) { | |||
883 | memcpy(ptr, inverse+k*w*w*row_ids[k+i], sizeof(int)*k*w*w); | |||
884 | ptr += (k*w*w); | |||
885 | } | |||
886 | free(inverse); | |||
887 | } | |||
888 | ||||
889 | /* Next, here comes the hard part. For each coding node that needs | |||
890 | to be decoded, you start by putting its rows of the distribution | |||
891 | matrix into the decoding matrix. If there were no failed data | |||
892 | nodes, then you're done. However, if there have been failed | |||
893 | data nodes, then you need to modify the columns that correspond | |||
894 | to the data nodes. You do that by first zeroing them. Then | |||
895 | whereever there is a one in the distribution matrix, you XOR | |||
896 | in the corresponding row from the failed data node's entry in | |||
897 | the decoding matrix. The whole process kind of makes my head | |||
898 | spin, but it works. | |||
899 | */ | |||
900 | ||||
901 | for (x = 0; x < cdf; x++) { | |||
902 | drive = row_ids[x+ddf+k]-k; | |||
903 | ptr = real_decoding_matrix + k*w*w*(ddf+x); | |||
904 | memcpy(ptr, bitmatrix+drive*k*w*w, sizeof(int)*k*w*w); | |||
905 | ||||
906 | for (i = 0; i < k; i++) { | |||
907 | if (row_ids[i] != i) { | |||
908 | for (j = 0; j < w; j++) { | |||
909 | bzero(ptr+j*k*w+i*w, sizeof(int)*w); | |||
910 | } | |||
911 | } | |||
912 | } | |||
913 | ||||
914 | /* There's the yucky part */ | |||
915 | ||||
916 | index = drive*k*w*w; | |||
917 | for (i = 0; i < k; i++) { | |||
918 | if (row_ids[i] != i) { | |||
919 | b1 = real_decoding_matrix+(ind_to_row[i]-k)*k*w*w; | |||
920 | for (j = 0; j < w; j++) { | |||
921 | b2 = ptr + j*k*w; | |||
922 | for (y = 0; y < w; y++) { | |||
923 | if (bitmatrix[index+j*k*w+i*w+y]) { | |||
924 | for (z = 0; z < k*w; z++) { | |||
925 | b2[z] = b2[z] ^ b1[z+y*k*w]; | |||
926 | } | |||
927 | } | |||
928 | } | |||
929 | } | |||
930 | } | |||
931 | } | |||
932 | } | |||
933 | ||||
934 | /* | |||
935 | printf("\n\nReal Decoding Matrix\n\n"); | |||
936 | jerasure_print_bitmatrix(real_decoding_matrix, (ddf+cdf)*w, k*w, w); | |||
937 | printf("\n"); */ | |||
938 | if (smart) { | |||
939 | schedule = jerasure_smart_bitmatrix_to_schedule(k, ddf+cdf, w, real_decoding_matrix); | |||
940 | } else { | |||
941 | schedule = jerasure_dumb_bitmatrix_to_schedule(k, ddf+cdf, w, real_decoding_matrix); | |||
942 | } | |||
943 | free(row_ids); | |||
944 | free(ind_to_row); | |||
945 | free(real_decoding_matrix); | |||
946 | return schedule; | |||
947 | } | |||
948 | ||||
949 | int jerasure_schedule_decode_lazy(int k, int m, int w, int *bitmatrix, int *erasures, | |||
950 | char **data_ptrs, char **coding_ptrs, int size, int packetsize, | |||
951 | int smart) | |||
952 | { | |||
953 | int i, tdone; | |||
954 | char **ptrs; | |||
955 | int **schedule; | |||
956 | ||||
957 | ptrs = set_up_ptrs_for_scheduled_decoding(k, m, erasures, data_ptrs, coding_ptrs); | |||
958 | if (ptrs == NULL((void*)0)) return -1; | |||
959 | ||||
960 | schedule = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart); | |||
961 | if (schedule == NULL((void*)0)) { | |||
962 | free(ptrs); | |||
963 | return -1; | |||
964 | } | |||
965 | ||||
966 | for (tdone = 0; tdone < size; tdone += packetsize*w) { | |||
967 | jerasure_do_scheduled_operations(ptrs, schedule, packetsize); | |||
968 | for (i = 0; i < k+m; i++) ptrs[i] += (packetsize*w); | |||
969 | } | |||
970 | ||||
971 | jerasure_free_schedule(schedule); | |||
972 | free(ptrs); | |||
973 | ||||
974 | return 0; | |||
975 | } | |||
976 | ||||
977 | int jerasure_schedule_decode_cache(int k, int m, int w, int ***scache, int *erasures, | |||
978 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||
979 | { | |||
980 | int i, tdone; | |||
981 | char **ptrs; | |||
982 | int **schedule; | |||
983 | int index; | |||
984 | ||||
985 | if (erasures[1] == -1) { | |||
986 | index = erasures[0]*(k+m) + erasures[0]; | |||
987 | } else if (erasures[2] == -1) { | |||
988 | index = erasures[0]*(k+m) + erasures[1]; | |||
989 | } else { | |||
990 | return -1; | |||
991 | } | |||
992 | ||||
993 | schedule = scache[index]; | |||
994 | ||||
995 | ptrs = set_up_ptrs_for_scheduled_decoding(k, m, erasures, data_ptrs, coding_ptrs); | |||
996 | if (ptrs == NULL((void*)0)) return -1; | |||
997 | ||||
998 | ||||
999 | for (tdone = 0; tdone < size; tdone += packetsize*w) { | |||
1000 | jerasure_do_scheduled_operations(ptrs, schedule, packetsize); | |||
1001 | for (i = 0; i < k+m; i++) ptrs[i] += (packetsize*w); | |||
1002 | } | |||
1003 | ||||
1004 | free(ptrs); | |||
1005 | ||||
1006 | return 0; | |||
1007 | } | |||
1008 | ||||
1009 | /* This only works when m = 2 */ | |||
1010 | ||||
1011 | int ***jerasure_generate_schedule_cache(int k, int m, int w, int *bitmatrix, int smart) | |||
1012 | { | |||
1013 | int ***scache; | |||
1014 | int erasures[3]; | |||
1015 | int e1, e2; | |||
1016 | ||||
1017 | /* Ok -- this is yucky, but it's how I'm doing it. You will make an index out | |||
1018 | of erasures, which will be e1*(k+m)+(e2). If there is no e2, then e2 = e1. | |||
1019 | Isn't that clever and confusing. Sorry. | |||
1020 | ||||
1021 | We're not going to worry about ordering -- in other words, the schedule for | |||
1022 | e1,e2 will be the same as e2,e1. They will have the same pointer -- the | |||
1023 | schedule will not be duplicated. */ | |||
1024 | ||||
1025 | if (m != 2) return NULL((void*)0); | |||
| ||||
1026 | ||||
1027 | scache = talloc(int **, (k+m)*(k+m+1))(int ** *) malloc(sizeof(int **)*((k+m)*(k+m+1))); | |||
1028 | if (scache == NULL((void*)0)) return NULL((void*)0); | |||
1029 | ||||
1030 | for (e1 = 0; e1 < k+m; e1++) { | |||
1031 | erasures[0] = e1; | |||
1032 | for (e2 = 0; e2 < e1; e2++) { | |||
1033 | erasures[1] = e2; | |||
1034 | erasures[2] = -1; | |||
1035 | scache[e1*(k+m)+e2] = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart); | |||
1036 | scache[e2*(k+m)+e1] = scache[e1*(k+m)+e2]; | |||
1037 | } | |||
1038 | erasures[1] = -1; | |||
1039 | scache[e1*(k+m)+e1] = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart); | |||
1040 | } | |||
1041 | return scache; | |||
1042 | ||||
1043 | } | |||
1044 | ||||
1045 | int jerasure_invert_bitmatrix(int *mat, int *inv, int rows) | |||
1046 | { | |||
1047 | int cols, i, j, k; | |||
1048 | int tmp; | |||
1049 | ||||
1050 | cols = rows; | |||
1051 | ||||
1052 | k = 0; | |||
1053 | for (i = 0; i < rows; i++) { | |||
1054 | for (j = 0; j < cols; j++) { | |||
1055 | inv[k] = (i == j) ? 1 : 0; | |||
1056 | k++; | |||
1057 | } | |||
1058 | } | |||
1059 | ||||
1060 | /* First -- convert into upper triangular */ | |||
1061 | ||||
1062 | for (i = 0; i < cols; i++) { | |||
1063 | ||||
1064 | /* Swap rows if we have a zero i,i element. If we can't swap, then the | |||
1065 | matrix was not invertible */ | |||
1066 | ||||
1067 | if ((mat[i*cols+i]) == 0) { | |||
1068 | for (j = i+1; j < rows && (mat[j*cols+i]) == 0; j++) ; | |||
1069 | if (j == rows) return -1; | |||
1070 | for (k = 0; k < cols; k++) { | |||
1071 | tmp = mat[i*cols+k]; mat[i*cols+k] = mat[j*cols+k]; mat[j*cols+k] = tmp; | |||
1072 | tmp = inv[i*cols+k]; inv[i*cols+k] = inv[j*cols+k]; inv[j*cols+k] = tmp; | |||
1073 | } | |||
1074 | } | |||
1075 | ||||
1076 | /* Now for each j>i, add A_ji*Ai to Aj */ | |||
1077 | for (j = i+1; j != rows; j++) { | |||
1078 | if (mat[j*cols+i] != 0) { | |||
1079 | for (k = 0; k < cols; k++) { | |||
1080 | mat[j*cols+k] ^= mat[i*cols+k]; | |||
1081 | inv[j*cols+k] ^= inv[i*cols+k]; | |||
1082 | } | |||
1083 | } | |||
1084 | } | |||
1085 | } | |||
1086 | ||||
1087 | /* Now the matrix is upper triangular. Start at the top and multiply down */ | |||
1088 | ||||
1089 | for (i = rows-1; i >= 0; i--) { | |||
1090 | for (j = 0; j < i; j++) { | |||
1091 | if (mat[j*cols+i]) { | |||
1092 | for (k = 0; k < cols; k++) { | |||
1093 | mat[j*cols+k] ^= mat[i*cols+k]; | |||
1094 | inv[j*cols+k] ^= inv[i*cols+k]; | |||
1095 | } | |||
1096 | } | |||
1097 | } | |||
1098 | } | |||
1099 | return 0; | |||
1100 | } | |||
1101 | ||||
1102 | int jerasure_invertible_bitmatrix(int *mat, int rows) | |||
1103 | { | |||
1104 | int cols, i, j, k; | |||
1105 | int tmp; | |||
1106 | ||||
1107 | cols = rows; | |||
1108 | ||||
1109 | /* First -- convert into upper triangular */ | |||
1110 | ||||
1111 | for (i = 0; i < cols; i++) { | |||
1112 | ||||
1113 | /* Swap rows if we have a zero i,i element. If we can't swap, then the | |||
1114 | matrix was not invertible */ | |||
1115 | ||||
1116 | if ((mat[i*cols+i]) == 0) { | |||
1117 | for (j = i+1; j < rows && (mat[j*cols+i]) == 0; j++) ; | |||
1118 | if (j == rows) return 0; | |||
1119 | for (k = 0; k < cols; k++) { | |||
1120 | tmp = mat[i*cols+k]; mat[i*cols+k] = mat[j*cols+k]; mat[j*cols+k] = tmp; | |||
1121 | } | |||
1122 | } | |||
1123 | ||||
1124 | /* Now for each j>i, add A_ji*Ai to Aj */ | |||
1125 | for (j = i+1; j != rows; j++) { | |||
1126 | if (mat[j*cols+i] != 0) { | |||
1127 | for (k = 0; k < cols; k++) { | |||
1128 | mat[j*cols+k] ^= mat[i*cols+k]; | |||
1129 | } | |||
1130 | } | |||
1131 | } | |||
1132 | } | |||
1133 | return 1; | |||
1134 | } | |||
1135 | ||||
1136 | ||||
1137 | int *jerasure_matrix_multiply(int *m1, int *m2, int r1, int c1, int r2, int c2, int w) | |||
1138 | { | |||
1139 | int *product, i, j, k; | |||
1140 | ||||
1141 | product = (int *) malloc(sizeof(int)*r1*c2); | |||
1142 | for (i = 0; i < r1*c2; i++) product[i] = 0; | |||
1143 | ||||
1144 | for (i = 0; i < r1; i++) { | |||
1145 | for (j = 0; j < c2; j++) { | |||
1146 | for (k = 0; k < r2; k++) { | |||
1147 | product[i*c2+j] ^= galois_single_multiply(m1[i*c1+k], m2[k*c2+j], w); | |||
1148 | } | |||
1149 | } | |||
1150 | } | |||
1151 | return product; | |||
1152 | } | |||
1153 | ||||
1154 | void jerasure_get_stats(double *fill_in) | |||
1155 | { | |||
1156 | fill_in[0] = jerasure_total_xor_bytes; | |||
1157 | fill_in[1] = jerasure_total_gf_bytes; | |||
1158 | fill_in[2] = jerasure_total_memcpy_bytes; | |||
1159 | jerasure_total_xor_bytes = 0; | |||
1160 | jerasure_total_gf_bytes = 0; | |||
1161 | jerasure_total_memcpy_bytes = 0; | |||
1162 | } | |||
1163 | ||||
1164 | void jerasure_do_scheduled_operations(char **ptrs, int **operations, int packetsize) | |||
1165 | { | |||
1166 | char *sptr; | |||
1167 | char *dptr; | |||
1168 | int op; | |||
1169 | ||||
1170 | for (op = 0; operations[op][0] >= 0; op++) { | |||
1171 | sptr = ptrs[operations[op][0]] + operations[op][1]*packetsize; | |||
1172 | dptr = ptrs[operations[op][2]] + operations[op][3]*packetsize; | |||
1173 | if (operations[op][4]) { | |||
1174 | /* printf("%d,%d %d,%d\n", operations[op][0], | |||
1175 | operations[op][1], | |||
1176 | operations[op][2], | |||
1177 | operations[op][3]); | |||
1178 | printf("xor(0x%x, 0x%x -> 0x%x, %d)\n", sptr, dptr, dptr, packetsize); */ | |||
1179 | galois_region_xor(sptr, dptr, packetsize); | |||
1180 | jerasure_total_xor_bytes += packetsize; | |||
1181 | } else { | |||
1182 | /* printf("memcpy(0x%x <- 0x%x)\n", dptr, sptr); */ | |||
1183 | memcpy(dptr, sptr, packetsize); | |||
1184 | jerasure_total_memcpy_bytes += packetsize; | |||
1185 | } | |||
1186 | } | |||
1187 | } | |||
1188 | ||||
1189 | void jerasure_schedule_encode(int k, int m, int w, int **schedule, | |||
1190 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||
1191 | { | |||
1192 | char **ptr_copy; | |||
1193 | int i, tdone; | |||
1194 | ||||
1195 | ptr_copy = talloc(char *, (k+m))(char * *) malloc(sizeof(char *)*((k+m))); | |||
1196 | for (i = 0; i < k; i++) ptr_copy[i] = data_ptrs[i]; | |||
1197 | for (i = 0; i < m; i++) ptr_copy[i+k] = coding_ptrs[i]; | |||
1198 | for (tdone = 0; tdone < size; tdone += packetsize*w) { | |||
1199 | jerasure_do_scheduled_operations(ptr_copy, schedule, packetsize); | |||
1200 | for (i = 0; i < k+m; i++) ptr_copy[i] += (packetsize*w); | |||
1201 | } | |||
1202 | free(ptr_copy); | |||
1203 | } | |||
1204 | ||||
1205 | int **jerasure_dumb_bitmatrix_to_schedule(int k, int m, int w, int *bitmatrix) | |||
1206 | { | |||
1207 | int **operations; | |||
1208 | int op; | |||
1209 | int index, optodo, i, j; | |||
1210 | ||||
1211 | operations = talloc(int *, k*m*w*w+1)(int * *) malloc(sizeof(int *)*(k*m*w*w+1)); | |||
1212 | op = 0; | |||
1213 | ||||
1214 | index = 0; | |||
1215 | for (i = 0; i < m*w; i++) { | |||
1216 | optodo = 0; | |||
1217 | for (j = 0; j < k*w; j++) { | |||
1218 | if (bitmatrix[index]) { | |||
1219 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||
1220 | operations[op][4] = optodo; | |||
1221 | operations[op][0] = j/w; | |||
1222 | operations[op][1] = j%w; | |||
1223 | operations[op][2] = k+i/w; | |||
1224 | operations[op][3] = i%w; | |||
1225 | optodo = 1; | |||
1226 | op++; | |||
1227 | ||||
1228 | } | |||
1229 | index++; | |||
1230 | } | |||
1231 | } | |||
1232 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||
1233 | operations[op][0] = -1; | |||
1234 | return operations; | |||
1235 | } | |||
1236 | ||||
1237 | int **jerasure_smart_bitmatrix_to_schedule(int k, int m, int w, int *bitmatrix) | |||
1238 | { | |||
1239 | int **operations; | |||
1240 | int op; | |||
1241 | int i, j; | |||
1242 | int *diff, *from, *b1, *flink, *blink; | |||
1243 | int *ptr, no, row; | |||
1244 | int optodo; | |||
1245 | int bestrow = 0, bestdiff, top; | |||
1246 | ||||
1247 | /* printf("Scheduling:\n\n"); | |||
1248 | jerasure_print_bitmatrix(bitmatrix, m*w, k*w, w); */ | |||
1249 | ||||
1250 | operations = talloc(int *, k*m*w*w+1)(int * *) malloc(sizeof(int *)*(k*m*w*w+1)); | |||
1251 | op = 0; | |||
1252 | ||||
1253 | diff = talloc(int, m*w)(int *) malloc(sizeof(int)*(m*w)); | |||
1254 | from = talloc(int, m*w)(int *) malloc(sizeof(int)*(m*w)); | |||
1255 | flink = talloc(int, m*w)(int *) malloc(sizeof(int)*(m*w)); | |||
1256 | blink = talloc(int, m*w)(int *) malloc(sizeof(int)*(m*w)); | |||
1257 | ||||
1258 | ptr = bitmatrix; | |||
1259 | ||||
1260 | bestdiff = k*w+1; | |||
1261 | top = 0; | |||
1262 | for (i = 0; i < m*w; i++) { | |||
1263 | no = 0; | |||
1264 | for (j = 0; j < k*w; j++) { | |||
1265 | no += *ptr; | |||
1266 | ptr++; | |||
1267 | } | |||
1268 | diff[i] = no; | |||
1269 | from[i] = -1; | |||
1270 | flink[i] = i+1; | |||
1271 | blink[i] = i-1; | |||
1272 | if (no < bestdiff) { | |||
1273 | bestdiff = no; | |||
1274 | bestrow = i; | |||
1275 | } | |||
1276 | } | |||
1277 | ||||
1278 | flink[m*w-1] = -1; | |||
1279 | ||||
1280 | while (top != -1) { | |||
1281 | row = bestrow; | |||
1282 | /* printf("Doing row %d - %d from %d\n", row, diff[row], from[row]); */ | |||
1283 | ||||
1284 | if (blink[row] == -1) { | |||
1285 | top = flink[row]; | |||
1286 | if (top != -1) blink[top] = -1; | |||
1287 | } else { | |||
1288 | flink[blink[row]] = flink[row]; | |||
1289 | if (flink[row] != -1) { | |||
1290 | blink[flink[row]] = blink[row]; | |||
1291 | } | |||
1292 | } | |||
1293 | ||||
1294 | ptr = bitmatrix + row*k*w; | |||
1295 | if (from[row] == -1) { | |||
1296 | optodo = 0; | |||
1297 | for (j = 0; j < k*w; j++) { | |||
1298 | if (ptr[j]) { | |||
1299 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||
1300 | operations[op][4] = optodo; | |||
1301 | operations[op][0] = j/w; | |||
1302 | operations[op][1] = j%w; | |||
1303 | operations[op][2] = k+row/w; | |||
1304 | operations[op][3] = row%w; | |||
1305 | optodo = 1; | |||
1306 | op++; | |||
1307 | } | |||
1308 | } | |||
1309 | } else { | |||
1310 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||
1311 | operations[op][4] = 0; | |||
1312 | operations[op][0] = k+from[row]/w; | |||
1313 | operations[op][1] = from[row]%w; | |||
1314 | operations[op][2] = k+row/w; | |||
1315 | operations[op][3] = row%w; | |||
1316 | op++; | |||
1317 | b1 = bitmatrix + from[row]*k*w; | |||
1318 | for (j = 0; j < k*w; j++) { | |||
1319 | if (ptr[j] ^ b1[j]) { | |||
1320 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||
1321 | operations[op][4] = 1; | |||
1322 | operations[op][0] = j/w; | |||
1323 | operations[op][1] = j%w; | |||
1324 | operations[op][2] = k+row/w; | |||
1325 | operations[op][3] = row%w; | |||
1326 | optodo = 1; | |||
1327 | op++; | |||
1328 | } | |||
1329 | } | |||
1330 | } | |||
1331 | bestdiff = k*w+1; | |||
1332 | for (i = top; i != -1; i = flink[i]) { | |||
1333 | no = 1; | |||
1334 | b1 = bitmatrix + i*k*w; | |||
1335 | for (j = 0; j < k*w; j++) no += (ptr[j] ^ b1[j]); | |||
1336 | if (no < diff[i]) { | |||
1337 | from[i] = row; | |||
1338 | diff[i] = no; | |||
1339 | } | |||
1340 | if (diff[i] < bestdiff) { | |||
1341 | bestdiff = diff[i]; | |||
1342 | bestrow = i; | |||
1343 | } | |||
1344 | } | |||
1345 | } | |||
1346 | ||||
1347 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||
1348 | operations[op][0] = -1; | |||
1349 | free(from); | |||
1350 | free(diff); | |||
1351 | free(blink); | |||
1352 | free(flink); | |||
1353 | ||||
1354 | return operations; | |||
1355 | } | |||
1356 | ||||
1357 | void jerasure_bitmatrix_encode(int k, int m, int w, int *bitmatrix, | |||
1358 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||
1359 | { | |||
1360 | int i; | |||
1361 | ||||
1362 | if (packetsize%sizeof(long) != 0) { | |||
1363 | fprintf(stderr, "jerasure_bitmatrix_encode - packetsize(%d) %c sizeof(long) != 0\n", packetsize, '%')__fprintf_chk (stderr, 2 - 1, "jerasure_bitmatrix_encode - packetsize(%d) %c sizeof(long) != 0\n" , packetsize, '%'); | |||
1364 | assert(0)((void) (0)); | |||
1365 | } | |||
1366 | if (size%(packetsize*w) != 0) { | |||
1367 | fprintf(stderr, "jerasure_bitmatrix_encode - size(%d) %c (packetsize(%d)*w(%d))) != 0\n",__fprintf_chk (stderr, 2 - 1, "jerasure_bitmatrix_encode - size(%d) %c (packetsize(%d)*w(%d))) != 0\n" , size, '%', packetsize, w) | |||
1368 | size, '%', packetsize, w)__fprintf_chk (stderr, 2 - 1, "jerasure_bitmatrix_encode - size(%d) %c (packetsize(%d)*w(%d))) != 0\n" , size, '%', packetsize, w); | |||
1369 | assert(0)((void) (0)); | |||
1370 | } | |||
1371 | ||||
1372 | for (i = 0; i < m; i++) { | |||
1373 | jerasure_bitmatrix_dotprod(k, w, bitmatrix+i*k*w*w, NULL((void*)0), k+i, data_ptrs, coding_ptrs, size, packetsize); | |||
1374 | } | |||
1375 | } | |||
1376 | ||||
1377 | /* | |||
1378 | * Exported function for use by autoconf to perform quick | |||
1379 | * spot-check. | |||
1380 | */ | |||
1381 | int jerasure_autoconf_test() | |||
1382 | { | |||
1383 | int x = galois_single_multiply(1, 2, 8); | |||
1384 | if (x != 2) { | |||
1385 | return -1; | |||
1386 | } | |||
1387 | return 0; | |||
1388 | } | |||
1389 |