File: | home/bhubbard/working/src/ceph/src/erasure-code/jerasure/jerasure/src/jerasure.c |
Warning: | line 842, column 90 Potential leak of memory pointed to by 'ind_to_row' |
[?] Use j/k keys for keyboard navigation
1 | /* * | |||||
2 | * Copyright (c) 2014, James S. Plank and Kevin Greenan | |||||
3 | * All rights reserved. | |||||
4 | * | |||||
5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure | |||||
6 | * Coding Techniques | |||||
7 | * | |||||
8 | * Revision 2.0: Galois Field backend now links to GF-Complete | |||||
9 | * | |||||
10 | * Redistribution and use in source and binary forms, with or without | |||||
11 | * modification, are permitted provided that the following conditions | |||||
12 | * are met: | |||||
13 | * | |||||
14 | * - Redistributions of source code must retain the above copyright | |||||
15 | * notice, this list of conditions and the following disclaimer. | |||||
16 | * | |||||
17 | * - Redistributions in binary form must reproduce the above copyright | |||||
18 | * notice, this list of conditions and the following disclaimer in | |||||
19 | * the documentation and/or other materials provided with the | |||||
20 | * distribution. | |||||
21 | * | |||||
22 | * - Neither the name of the University of Tennessee nor the names of its | |||||
23 | * contributors may be used to endorse or promote products derived | |||||
24 | * from this software without specific prior written permission. | |||||
25 | * | |||||
26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||||
27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||||
28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||||
29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||||
30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |||||
31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | |||||
32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | |||||
33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |||||
34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY | |||||
36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||||
37 | * POSSIBILITY OF SUCH DAMAGE. | |||||
38 | */ | |||||
39 | ||||||
40 | /* Jerasure's authors: | |||||
41 | ||||||
42 | Revision 2.x - 2014: James S. Plank and Kevin M. Greenan | |||||
43 | Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman. | |||||
44 | Revision 1.0 - 2007: James S. Plank | |||||
45 | */ | |||||
46 | ||||||
47 | #include <stdio.h> | |||||
48 | #include <stdlib.h> | |||||
49 | #include <string.h> | |||||
50 | #include <assert.h> | |||||
51 | ||||||
52 | #include "galois.h" | |||||
53 | #include "jerasure.h" | |||||
54 | ||||||
55 | #define talloc(type, num)(type *) malloc(sizeof(type)*(num)) (type *) malloc(sizeof(type)*(num)) | |||||
56 | ||||||
57 | static double jerasure_total_xor_bytes = 0; | |||||
58 | static double jerasure_total_gf_bytes = 0; | |||||
59 | static double jerasure_total_memcpy_bytes = 0; | |||||
60 | ||||||
61 | void jerasure_print_matrix(int *m, int rows, int cols, int w) | |||||
62 | { | |||||
63 | int i, j; | |||||
64 | int fw; | |||||
65 | char s[30]; | |||||
66 | unsigned int w2; | |||||
67 | ||||||
68 | if (w == 32) { | |||||
69 | fw = 10; | |||||
70 | } else { | |||||
71 | w2 = (1 << w); | |||||
72 | sprintf(s, "%u", w2-1)__builtin___sprintf_chk (s, 2 - 1, __builtin_object_size (s, 2 > 1), "%u", w2-1); | |||||
73 | fw = strlen(s); | |||||
74 | } | |||||
75 | ||||||
76 | for (i = 0; i < rows; i++) { | |||||
77 | for (j = 0; j < cols; j++) { | |||||
78 | if (j != 0) printf(" ")__printf_chk (2 - 1, " "); | |||||
79 | printf("%*u", fw, m[i*cols+j])__printf_chk (2 - 1, "%*u", fw, m[i*cols+j]); | |||||
80 | } | |||||
81 | printf("\n")__printf_chk (2 - 1, "\n"); | |||||
82 | } | |||||
83 | } | |||||
84 | ||||||
85 | void jerasure_print_bitmatrix(int *m, int rows, int cols, int w) | |||||
86 | { | |||||
87 | int i, j; | |||||
88 | ||||||
89 | for (i = 0; i < rows; i++) { | |||||
90 | if (i != 0 && i%w == 0) printf("\n")__printf_chk (2 - 1, "\n"); | |||||
91 | for (j = 0; j < cols; j++) { | |||||
92 | if (j != 0 && j%w == 0) printf(" ")__printf_chk (2 - 1, " "); | |||||
93 | printf("%d", m[i*cols+j])__printf_chk (2 - 1, "%d", m[i*cols+j]); | |||||
94 | } | |||||
95 | printf("\n")__printf_chk (2 - 1, "\n"); | |||||
96 | } | |||||
97 | } | |||||
98 | ||||||
99 | int jerasure_make_decoding_matrix(int k, int m, int w, int *matrix, int *erased, int *decoding_matrix, int *dm_ids) | |||||
100 | { | |||||
101 | int i, j, *tmpmat; | |||||
102 | ||||||
103 | j = 0; | |||||
104 | for (i = 0; j < k; i++) { | |||||
105 | if (erased[i] == 0) { | |||||
106 | dm_ids[j] = i; | |||||
107 | j++; | |||||
108 | } | |||||
109 | } | |||||
110 | ||||||
111 | tmpmat = talloc(int, k*k)(int *) malloc(sizeof(int)*(k*k)); | |||||
112 | if (tmpmat == NULL((void*)0)) { return -1; } | |||||
113 | for (i = 0; i < k; i++) { | |||||
114 | if (dm_ids[i] < k) { | |||||
115 | for (j = 0; j < k; j++) tmpmat[i*k+j] = 0; | |||||
116 | tmpmat[i*k+dm_ids[i]] = 1; | |||||
117 | } else { | |||||
118 | for (j = 0; j < k; j++) { | |||||
119 | tmpmat[i*k+j] = matrix[(dm_ids[i]-k)*k+j]; | |||||
120 | } | |||||
121 | } | |||||
122 | } | |||||
123 | ||||||
124 | i = jerasure_invert_matrix(tmpmat, decoding_matrix, k, w); | |||||
125 | free(tmpmat); | |||||
126 | return i; | |||||
127 | } | |||||
128 | ||||||
129 | /* Internal Routine */ | |||||
130 | int jerasure_make_decoding_bitmatrix(int k, int m, int w, int *matrix, int *erased, int *decoding_matrix, int *dm_ids) | |||||
131 | { | |||||
132 | int i, j, *tmpmat; | |||||
133 | int index, mindex; | |||||
134 | ||||||
135 | j = 0; | |||||
136 | for (i = 0; j < k; i++) { | |||||
137 | if (erased[i] == 0) { | |||||
138 | dm_ids[j] = i; | |||||
139 | j++; | |||||
140 | } | |||||
141 | } | |||||
142 | ||||||
143 | tmpmat = talloc(int, k*k*w*w)(int *) malloc(sizeof(int)*(k*k*w*w)); | |||||
144 | if (tmpmat == NULL((void*)0)) { return -1; } | |||||
145 | for (i = 0; i < k; i++) { | |||||
146 | if (dm_ids[i] < k) { | |||||
147 | index = i*k*w*w; | |||||
148 | for (j = 0; j < k*w*w; j++) tmpmat[index+j] = 0; | |||||
149 | index = i*k*w*w+dm_ids[i]*w; | |||||
150 | for (j = 0; j < w; j++) { | |||||
151 | tmpmat[index] = 1; | |||||
152 | index += (k*w+1); | |||||
153 | } | |||||
154 | } else { | |||||
155 | index = i*k*w*w; | |||||
156 | mindex = (dm_ids[i]-k)*k*w*w; | |||||
157 | for (j = 0; j < k*w*w; j++) { | |||||
158 | tmpmat[index+j] = matrix[mindex+j]; | |||||
159 | } | |||||
160 | } | |||||
161 | } | |||||
162 | ||||||
163 | i = jerasure_invert_bitmatrix(tmpmat, decoding_matrix, k*w); | |||||
164 | free(tmpmat); | |||||
165 | return i; | |||||
166 | } | |||||
167 | ||||||
168 | int jerasure_matrix_decode(int k, int m, int w, int *matrix, int row_k_ones, int *erasures, | |||||
169 | char **data_ptrs, char **coding_ptrs, int size) | |||||
170 | { | |||||
171 | int i, edd, lastdrive; | |||||
172 | int *tmpids; | |||||
173 | int *erased, *decoding_matrix, *dm_ids; | |||||
174 | ||||||
175 | if (w != 8 && w != 16 && w != 32) return -1; | |||||
176 | ||||||
177 | erased = jerasure_erasures_to_erased(k, m, erasures); | |||||
178 | if (erased == NULL((void*)0)) return -1; | |||||
179 | ||||||
180 | /* Find the number of data drives failed */ | |||||
181 | ||||||
182 | lastdrive = k; | |||||
183 | ||||||
184 | edd = 0; | |||||
185 | for (i = 0; i < k; i++) { | |||||
186 | if (erased[i]) { | |||||
187 | edd++; | |||||
188 | lastdrive = i; | |||||
189 | } | |||||
190 | } | |||||
191 | ||||||
192 | /* You only need to create the decoding matrix in the following cases: | |||||
193 | ||||||
194 | 1. edd > 0 and row_k_ones is false. | |||||
195 | 2. edd > 0 and row_k_ones is true and coding device 0 has been erased. | |||||
196 | 3. edd > 1 | |||||
197 | ||||||
198 | We're going to use lastdrive to denote when to stop decoding data. | |||||
199 | At this point in the code, it is equal to the last erased data device. | |||||
200 | However, if we can't use the parity row to decode it (i.e. row_k_ones=0 | |||||
201 | or erased[k] = 1, we're going to set it to k so that the decoding | |||||
202 | pass will decode all data. | |||||
203 | */ | |||||
204 | ||||||
205 | if (!row_k_ones || erased[k]) lastdrive = k; | |||||
206 | ||||||
207 | dm_ids = NULL((void*)0); | |||||
208 | decoding_matrix = NULL((void*)0); | |||||
209 | ||||||
210 | if (edd > 1 || (edd > 0 && (!row_k_ones || erased[k]))) { | |||||
211 | dm_ids = talloc(int, k)(int *) malloc(sizeof(int)*(k)); | |||||
212 | if (dm_ids == NULL((void*)0)) { | |||||
213 | free(erased); | |||||
214 | return -1; | |||||
215 | } | |||||
216 | ||||||
217 | decoding_matrix = talloc(int, k*k)(int *) malloc(sizeof(int)*(k*k)); | |||||
218 | if (decoding_matrix == NULL((void*)0)) { | |||||
219 | free(erased); | |||||
220 | free(dm_ids); | |||||
221 | return -1; | |||||
222 | } | |||||
223 | ||||||
224 | if (jerasure_make_decoding_matrix(k, m, w, matrix, erased, decoding_matrix, dm_ids) < 0) { | |||||
225 | free(erased); | |||||
226 | free(dm_ids); | |||||
227 | free(decoding_matrix); | |||||
228 | return -1; | |||||
229 | } | |||||
230 | } | |||||
231 | ||||||
232 | /* Decode the data drives. | |||||
233 | If row_k_ones is true and coding device 0 is intact, then only decode edd-1 drives. | |||||
234 | This is done by stopping at lastdrive. | |||||
235 | We test whether edd > 0 so that we can exit the loop early if we're done. | |||||
236 | */ | |||||
237 | ||||||
238 | for (i = 0; edd > 0 && i < lastdrive; i++) { | |||||
239 | if (erased[i]) { | |||||
240 | jerasure_matrix_dotprod(k, w, decoding_matrix+(i*k), dm_ids, i, data_ptrs, coding_ptrs, size); | |||||
241 | edd--; | |||||
242 | } | |||||
243 | } | |||||
244 | ||||||
245 | /* Then if necessary, decode drive lastdrive */ | |||||
246 | ||||||
247 | if (edd > 0) { | |||||
248 | tmpids = talloc(int, k)(int *) malloc(sizeof(int)*(k)); | |||||
249 | for (i = 0; i < k; i++) { | |||||
250 | tmpids[i] = (i < lastdrive) ? i : i+1; | |||||
251 | } | |||||
252 | jerasure_matrix_dotprod(k, w, matrix, tmpids, lastdrive, data_ptrs, coding_ptrs, size); | |||||
253 | free(tmpids); | |||||
254 | } | |||||
255 | ||||||
256 | /* Finally, re-encode any erased coding devices */ | |||||
257 | ||||||
258 | for (i = 0; i < m; i++) { | |||||
259 | if (erased[k+i]) { | |||||
260 | jerasure_matrix_dotprod(k, w, matrix+(i*k), NULL((void*)0), i+k, data_ptrs, coding_ptrs, size); | |||||
261 | } | |||||
262 | } | |||||
263 | ||||||
264 | free(erased); | |||||
265 | if (dm_ids != NULL((void*)0)) free(dm_ids); | |||||
266 | if (decoding_matrix != NULL((void*)0)) free(decoding_matrix); | |||||
267 | ||||||
268 | return 0; | |||||
269 | } | |||||
270 | ||||||
271 | ||||||
272 | int *jerasure_matrix_to_bitmatrix(int k, int m, int w, int *matrix) | |||||
273 | { | |||||
274 | int *bitmatrix; | |||||
275 | int rowelts, rowindex, colindex, elt, i, j, l, x; | |||||
276 | ||||||
277 | if (matrix == NULL((void*)0)) { return NULL((void*)0); } | |||||
278 | ||||||
279 | bitmatrix = talloc(int, k*m*w*w)(int *) malloc(sizeof(int)*(k*m*w*w)); | |||||
280 | ||||||
281 | rowelts = k * w; | |||||
282 | rowindex = 0; | |||||
283 | ||||||
284 | for (i = 0; i < m; i++) { | |||||
285 | colindex = rowindex; | |||||
286 | for (j = 0; j < k; j++) { | |||||
287 | elt = matrix[i*k+j]; | |||||
288 | for (x = 0; x < w; x++) { | |||||
289 | for (l = 0; l < w; l++) { | |||||
290 | bitmatrix[colindex+x+l*rowelts] = ((elt & (1 << l)) ? 1 : 0); | |||||
291 | } | |||||
292 | elt = galois_single_multiply(elt, 2, w); | |||||
293 | } | |||||
294 | colindex += w; | |||||
295 | } | |||||
296 | rowindex += rowelts * w; | |||||
297 | } | |||||
298 | return bitmatrix; | |||||
299 | } | |||||
300 | ||||||
301 | void jerasure_matrix_encode(int k, int m, int w, int *matrix, | |||||
302 | char **data_ptrs, char **coding_ptrs, int size) | |||||
303 | { | |||||
304 | int i; | |||||
305 | ||||||
306 | if (w != 8 && w != 16 && w != 32) { | |||||
307 | fprintf(stderr, "ERROR: jerasure_matrix_encode() and w is not 8, 16 or 32\n")__fprintf_chk (stderr, 2 - 1, "ERROR: jerasure_matrix_encode() and w is not 8, 16 or 32\n" ); | |||||
308 | assert(0)((void) (0)); | |||||
309 | } | |||||
310 | ||||||
311 | for (i = 0; i < m; i++) { | |||||
312 | jerasure_matrix_dotprod(k, w, matrix+(i*k), NULL((void*)0), k+i, data_ptrs, coding_ptrs, size); | |||||
313 | } | |||||
314 | } | |||||
315 | ||||||
316 | void jerasure_bitmatrix_dotprod(int k, int w, int *bitmatrix_row, | |||||
317 | int *src_ids, int dest_id, | |||||
318 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||||
319 | { | |||||
320 | int j, sindex, pstarted, index, x, y; | |||||
321 | char *dptr, *pptr, *bdptr, *bpptr; | |||||
322 | ||||||
323 | if (size%(w*packetsize) != 0) { | |||||
324 | fprintf(stderr, "jerasure_bitmatrix_dotprod - size%c(w*packetsize)) must = 0\n", '%')__fprintf_chk (stderr, 2 - 1, "jerasure_bitmatrix_dotprod - size%c(w*packetsize)) must = 0\n" , '%'); | |||||
325 | assert(0)((void) (0)); | |||||
326 | } | |||||
327 | ||||||
328 | bpptr = (dest_id < k) ? data_ptrs[dest_id] : coding_ptrs[dest_id-k]; | |||||
329 | ||||||
330 | for (sindex = 0; sindex < size; sindex += (packetsize*w)) { | |||||
331 | index = 0; | |||||
332 | for (j = 0; j < w; j++) { | |||||
333 | pstarted = 0; | |||||
334 | pptr = bpptr + sindex + j*packetsize; | |||||
335 | for (x = 0; x < k; x++) { | |||||
336 | if (src_ids == NULL((void*)0)) { | |||||
337 | bdptr = data_ptrs[x]; | |||||
338 | } else if (src_ids[x] < k) { | |||||
339 | bdptr = data_ptrs[src_ids[x]]; | |||||
340 | } else { | |||||
341 | bdptr = coding_ptrs[src_ids[x]-k]; | |||||
342 | } | |||||
343 | for (y = 0; y < w; y++) { | |||||
344 | if (bitmatrix_row[index]) { | |||||
345 | dptr = bdptr + sindex + y*packetsize; | |||||
346 | if (!pstarted) { | |||||
347 | memcpy(pptr, dptr, packetsize); | |||||
348 | jerasure_total_memcpy_bytes += packetsize; | |||||
349 | pstarted = 1; | |||||
350 | } else { | |||||
351 | galois_region_xor(dptr, pptr, packetsize); | |||||
352 | jerasure_total_xor_bytes += packetsize; | |||||
353 | } | |||||
354 | } | |||||
355 | index++; | |||||
356 | } | |||||
357 | } | |||||
358 | } | |||||
359 | } | |||||
360 | } | |||||
361 | ||||||
362 | void jerasure_do_parity(int k, char **data_ptrs, char *parity_ptr, int size) | |||||
363 | { | |||||
364 | int i; | |||||
365 | ||||||
366 | memcpy(parity_ptr, data_ptrs[0], size); | |||||
367 | jerasure_total_memcpy_bytes += size; | |||||
368 | ||||||
369 | for (i = 1; i < k; i++) { | |||||
370 | galois_region_xor(data_ptrs[i], parity_ptr, size); | |||||
371 | jerasure_total_xor_bytes += size; | |||||
372 | } | |||||
373 | } | |||||
374 | ||||||
375 | int jerasure_invert_matrix(int *mat, int *inv, int rows, int w) | |||||
376 | { | |||||
377 | int cols, i, j, k, x, rs2; | |||||
378 | int row_start, tmp, inverse; | |||||
379 | ||||||
380 | cols = rows; | |||||
381 | ||||||
382 | k = 0; | |||||
383 | for (i = 0; i < rows; i++) { | |||||
384 | for (j = 0; j < cols; j++) { | |||||
385 | inv[k] = (i == j) ? 1 : 0; | |||||
386 | k++; | |||||
387 | } | |||||
388 | } | |||||
389 | ||||||
390 | /* First -- convert into upper triangular */ | |||||
391 | for (i = 0; i < cols; i++) { | |||||
392 | row_start = cols*i; | |||||
393 | ||||||
394 | /* Swap rows if we ave a zero i,i element. If we can't swap, then the | |||||
395 | matrix was not invertible */ | |||||
396 | ||||||
397 | if (mat[row_start+i] == 0) { | |||||
398 | for (j = i+1; j < rows && mat[cols*j+i] == 0; j++) ; | |||||
399 | if (j == rows) return -1; | |||||
400 | rs2 = j*cols; | |||||
401 | for (k = 0; k < cols; k++) { | |||||
402 | tmp = mat[row_start+k]; | |||||
403 | mat[row_start+k] = mat[rs2+k]; | |||||
404 | mat[rs2+k] = tmp; | |||||
405 | tmp = inv[row_start+k]; | |||||
406 | inv[row_start+k] = inv[rs2+k]; | |||||
407 | inv[rs2+k] = tmp; | |||||
408 | } | |||||
409 | } | |||||
410 | ||||||
411 | /* Multiply the row by 1/element i,i */ | |||||
412 | tmp = mat[row_start+i]; | |||||
413 | if (tmp != 1) { | |||||
414 | inverse = galois_single_divide(1, tmp, w); | |||||
415 | for (j = 0; j < cols; j++) { | |||||
416 | mat[row_start+j] = galois_single_multiply(mat[row_start+j], inverse, w); | |||||
417 | inv[row_start+j] = galois_single_multiply(inv[row_start+j], inverse, w); | |||||
418 | } | |||||
419 | } | |||||
420 | ||||||
421 | /* Now for each j>i, add A_ji*Ai to Aj */ | |||||
422 | k = row_start+i; | |||||
423 | for (j = i+1; j != cols; j++) { | |||||
424 | k += cols; | |||||
425 | if (mat[k] != 0) { | |||||
426 | if (mat[k] == 1) { | |||||
427 | rs2 = cols*j; | |||||
428 | for (x = 0; x < cols; x++) { | |||||
429 | mat[rs2+x] ^= mat[row_start+x]; | |||||
430 | inv[rs2+x] ^= inv[row_start+x]; | |||||
431 | } | |||||
432 | } else { | |||||
433 | tmp = mat[k]; | |||||
434 | rs2 = cols*j; | |||||
435 | for (x = 0; x < cols; x++) { | |||||
436 | mat[rs2+x] ^= galois_single_multiply(tmp, mat[row_start+x], w); | |||||
437 | inv[rs2+x] ^= galois_single_multiply(tmp, inv[row_start+x], w); | |||||
438 | } | |||||
439 | } | |||||
440 | } | |||||
441 | } | |||||
442 | } | |||||
443 | ||||||
444 | /* Now the matrix is upper triangular. Start at the top and multiply down */ | |||||
445 | ||||||
446 | for (i = rows-1; i >= 0; i--) { | |||||
447 | row_start = i*cols; | |||||
448 | for (j = 0; j < i; j++) { | |||||
449 | rs2 = j*cols; | |||||
450 | if (mat[rs2+i] != 0) { | |||||
451 | tmp = mat[rs2+i]; | |||||
452 | mat[rs2+i] = 0; | |||||
453 | for (k = 0; k < cols; k++) { | |||||
454 | inv[rs2+k] ^= galois_single_multiply(tmp, inv[row_start+k], w); | |||||
455 | } | |||||
456 | } | |||||
457 | } | |||||
458 | } | |||||
459 | return 0; | |||||
460 | } | |||||
461 | ||||||
462 | int jerasure_invertible_matrix(int *mat, int rows, int w) | |||||
463 | { | |||||
464 | int cols, i, j, k, x, rs2; | |||||
465 | int row_start, tmp, inverse; | |||||
466 | ||||||
467 | cols = rows; | |||||
468 | ||||||
469 | /* First -- convert into upper triangular */ | |||||
470 | for (i = 0; i < cols; i++) { | |||||
471 | row_start = cols*i; | |||||
472 | ||||||
473 | /* Swap rows if we ave a zero i,i element. If we can't swap, then the | |||||
474 | matrix was not invertible */ | |||||
475 | ||||||
476 | if (mat[row_start+i] == 0) { | |||||
477 | for (j = i+1; j < rows && mat[cols*j+i] == 0; j++) ; | |||||
478 | if (j == rows) return 0; | |||||
479 | rs2 = j*cols; | |||||
480 | for (k = 0; k < cols; k++) { | |||||
481 | tmp = mat[row_start+k]; | |||||
482 | mat[row_start+k] = mat[rs2+k]; | |||||
483 | mat[rs2+k] = tmp; | |||||
484 | } | |||||
485 | } | |||||
486 | ||||||
487 | /* Multiply the row by 1/element i,i */ | |||||
488 | tmp = mat[row_start+i]; | |||||
489 | if (tmp != 1) { | |||||
490 | inverse = galois_single_divide(1, tmp, w); | |||||
491 | for (j = 0; j < cols; j++) { | |||||
492 | mat[row_start+j] = galois_single_multiply(mat[row_start+j], inverse, w); | |||||
493 | } | |||||
494 | } | |||||
495 | ||||||
496 | /* Now for each j>i, add A_ji*Ai to Aj */ | |||||
497 | k = row_start+i; | |||||
498 | for (j = i+1; j != cols; j++) { | |||||
499 | k += cols; | |||||
500 | if (mat[k] != 0) { | |||||
501 | if (mat[k] == 1) { | |||||
502 | rs2 = cols*j; | |||||
503 | for (x = 0; x < cols; x++) { | |||||
504 | mat[rs2+x] ^= mat[row_start+x]; | |||||
505 | } | |||||
506 | } else { | |||||
507 | tmp = mat[k]; | |||||
508 | rs2 = cols*j; | |||||
509 | for (x = 0; x < cols; x++) { | |||||
510 | mat[rs2+x] ^= galois_single_multiply(tmp, mat[row_start+x], w); | |||||
511 | } | |||||
512 | } | |||||
513 | } | |||||
514 | } | |||||
515 | } | |||||
516 | return 1; | |||||
517 | } | |||||
518 | ||||||
519 | /* Converts a list-style version of the erasures into an array of k+m elements | |||||
520 | where the element = 1 if the index has been erased, and zero otherwise */ | |||||
521 | ||||||
522 | int *jerasure_erasures_to_erased(int k, int m, int *erasures) | |||||
523 | { | |||||
524 | int td; | |||||
525 | int t_non_erased; | |||||
526 | int *erased; | |||||
527 | int i; | |||||
528 | ||||||
529 | td = k+m; | |||||
530 | erased = talloc(int, td)(int *) malloc(sizeof(int)*(td)); | |||||
531 | if (erased == NULL((void*)0)) return NULL((void*)0); | |||||
532 | t_non_erased = td; | |||||
533 | ||||||
534 | for (i = 0; i < td; i++) erased[i] = 0; | |||||
535 | ||||||
536 | for (i = 0; erasures[i] != -1; i++) { | |||||
537 | if (erased[erasures[i]] == 0) { | |||||
538 | erased[erasures[i]] = 1; | |||||
539 | t_non_erased--; | |||||
540 | if (t_non_erased < k) { | |||||
541 | free(erased); | |||||
542 | return NULL((void*)0); | |||||
543 | } | |||||
544 | } | |||||
545 | } | |||||
546 | return erased; | |||||
547 | } | |||||
548 | ||||||
549 | void jerasure_free_schedule(int **schedule) | |||||
550 | { | |||||
551 | int i; | |||||
552 | ||||||
553 | for (i = 0; schedule[i][0] >= 0; i++) free(schedule[i]); | |||||
554 | free(schedule[i]); | |||||
555 | free(schedule); | |||||
556 | } | |||||
557 | ||||||
558 | void jerasure_free_schedule_cache(int k, int m, int ***cache) | |||||
559 | { | |||||
560 | int e1, e2; | |||||
561 | ||||||
562 | if (m != 2) { | |||||
563 | fprintf(stderr, "jerasure_free_schedule_cache(): m must equal 2\n")__fprintf_chk (stderr, 2 - 1, "jerasure_free_schedule_cache(): m must equal 2\n" ); | |||||
564 | assert(0)((void) (0)); | |||||
565 | } | |||||
566 | ||||||
567 | for (e1 = 0; e1 < k+m; e1++) { | |||||
568 | for (e2 = 0; e2 < e1; e2++) { | |||||
569 | jerasure_free_schedule(cache[e1*(k+m)+e2]); | |||||
570 | } | |||||
571 | jerasure_free_schedule(cache[e1*(k+m)+e1]); | |||||
572 | } | |||||
573 | free(cache); | |||||
574 | } | |||||
575 | ||||||
576 | void jerasure_matrix_dotprod(int k, int w, int *matrix_row, | |||||
577 | int *src_ids, int dest_id, | |||||
578 | char **data_ptrs, char **coding_ptrs, int size) | |||||
579 | { | |||||
580 | int init; | |||||
581 | char *dptr, *sptr; | |||||
582 | int i; | |||||
583 | ||||||
584 | if (w != 1 && w != 8 && w != 16 && w != 32) { | |||||
585 | fprintf(stderr, "ERROR: jerasure_matrix_dotprod() called and w is not 1, 8, 16 or 32\n")__fprintf_chk (stderr, 2 - 1, "ERROR: jerasure_matrix_dotprod() called and w is not 1, 8, 16 or 32\n" ); | |||||
586 | assert(0)((void) (0)); | |||||
587 | } | |||||
588 | ||||||
589 | init = 0; | |||||
590 | ||||||
591 | dptr = (dest_id < k) ? data_ptrs[dest_id] : coding_ptrs[dest_id-k]; | |||||
592 | ||||||
593 | /* First copy or xor any data that does not need to be multiplied by a factor */ | |||||
594 | ||||||
595 | for (i = 0; i < k; i++) { | |||||
596 | if (matrix_row[i] == 1) { | |||||
597 | if (src_ids == NULL((void*)0)) { | |||||
598 | sptr = data_ptrs[i]; | |||||
599 | } else if (src_ids[i] < k) { | |||||
600 | sptr = data_ptrs[src_ids[i]]; | |||||
601 | } else { | |||||
602 | sptr = coding_ptrs[src_ids[i]-k]; | |||||
603 | } | |||||
604 | if (init == 0) { | |||||
605 | memcpy(dptr, sptr, size); | |||||
606 | jerasure_total_memcpy_bytes += size; | |||||
607 | init = 1; | |||||
608 | } else { | |||||
609 | galois_region_xor(sptr, dptr, size); | |||||
610 | jerasure_total_xor_bytes += size; | |||||
611 | } | |||||
612 | } | |||||
613 | } | |||||
614 | ||||||
615 | /* Now do the data that needs to be multiplied by a factor */ | |||||
616 | ||||||
617 | for (i = 0; i < k; i++) { | |||||
618 | if (matrix_row[i] != 0 && matrix_row[i] != 1) { | |||||
619 | if (src_ids == NULL((void*)0)) { | |||||
620 | sptr = data_ptrs[i]; | |||||
621 | } else if (src_ids[i] < k) { | |||||
622 | sptr = data_ptrs[src_ids[i]]; | |||||
623 | } else { | |||||
624 | sptr = coding_ptrs[src_ids[i]-k]; | |||||
625 | } | |||||
626 | switch (w) { | |||||
627 | case 8: galois_w08_region_multiply(sptr, matrix_row[i], size, dptr, init); break; | |||||
628 | case 16: galois_w16_region_multiply(sptr, matrix_row[i], size, dptr, init); break; | |||||
629 | case 32: galois_w32_region_multiply(sptr, matrix_row[i], size, dptr, init); break; | |||||
630 | } | |||||
631 | jerasure_total_gf_bytes += size; | |||||
632 | init = 1; | |||||
633 | } | |||||
634 | } | |||||
635 | } | |||||
636 | ||||||
637 | ||||||
638 | int jerasure_bitmatrix_decode(int k, int m, int w, int *bitmatrix, int row_k_ones, int *erasures, | |||||
639 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||||
640 | { | |||||
641 | int i; | |||||
642 | int *erased; | |||||
643 | int *decoding_matrix; | |||||
644 | int *dm_ids; | |||||
645 | int edd, *tmpids, lastdrive; | |||||
646 | ||||||
647 | erased = jerasure_erasures_to_erased(k, m, erasures); | |||||
648 | if (erased == NULL((void*)0)) return -1; | |||||
649 | ||||||
650 | /* See jerasure_matrix_decode for the logic of this routine. This one works just like | |||||
651 | it, but calls the bitmatrix ops instead */ | |||||
652 | ||||||
653 | lastdrive = k; | |||||
654 | ||||||
655 | edd = 0; | |||||
656 | for (i = 0; i < k; i++) { | |||||
657 | if (erased[i]) { | |||||
658 | edd++; | |||||
659 | lastdrive = i; | |||||
660 | } | |||||
661 | } | |||||
662 | ||||||
663 | if (row_k_ones != 1 || erased[k]) lastdrive = k; | |||||
664 | ||||||
665 | dm_ids = NULL((void*)0); | |||||
666 | decoding_matrix = NULL((void*)0); | |||||
667 | ||||||
668 | if (edd > 1 || (edd > 0 && (row_k_ones != 1 || erased[k]))) { | |||||
669 | ||||||
670 | dm_ids = talloc(int, k)(int *) malloc(sizeof(int)*(k)); | |||||
671 | if (dm_ids == NULL((void*)0)) { | |||||
672 | free(erased); | |||||
673 | return -1; | |||||
674 | } | |||||
675 | ||||||
676 | decoding_matrix = talloc(int, k*k*w*w)(int *) malloc(sizeof(int)*(k*k*w*w)); | |||||
677 | if (decoding_matrix == NULL((void*)0)) { | |||||
678 | free(erased); | |||||
679 | free(dm_ids); | |||||
680 | return -1; | |||||
681 | } | |||||
682 | ||||||
683 | if (jerasure_make_decoding_bitmatrix(k, m, w, bitmatrix, erased, decoding_matrix, dm_ids) < 0) { | |||||
684 | free(erased); | |||||
685 | free(dm_ids); | |||||
686 | free(decoding_matrix); | |||||
687 | return -1; | |||||
688 | } | |||||
689 | } | |||||
690 | ||||||
691 | for (i = 0; edd > 0 && i < lastdrive; i++) { | |||||
692 | if (erased[i]) { | |||||
693 | jerasure_bitmatrix_dotprod(k, w, decoding_matrix+i*k*w*w, dm_ids, i, data_ptrs, coding_ptrs, size, packetsize); | |||||
694 | edd--; | |||||
695 | } | |||||
696 | } | |||||
697 | ||||||
698 | if (edd > 0) { | |||||
699 | tmpids = talloc(int, k)(int *) malloc(sizeof(int)*(k)); | |||||
700 | for (i = 0; i < k; i++) { | |||||
701 | tmpids[i] = (i < lastdrive) ? i : i+1; | |||||
702 | } | |||||
703 | jerasure_bitmatrix_dotprod(k, w, bitmatrix, tmpids, lastdrive, data_ptrs, coding_ptrs, size, packetsize); | |||||
704 | free(tmpids); | |||||
705 | } | |||||
706 | ||||||
707 | for (i = 0; i < m; i++) { | |||||
708 | if (erased[k+i]) { | |||||
709 | jerasure_bitmatrix_dotprod(k, w, bitmatrix+i*k*w*w, NULL((void*)0), k+i, data_ptrs, coding_ptrs, size, packetsize); | |||||
710 | } | |||||
711 | } | |||||
712 | ||||||
713 | free(erased); | |||||
714 | if (dm_ids != NULL((void*)0)) free(dm_ids); | |||||
715 | if (decoding_matrix != NULL((void*)0)) free(decoding_matrix); | |||||
716 | ||||||
717 | return 0; | |||||
718 | } | |||||
719 | ||||||
720 | static char **set_up_ptrs_for_scheduled_decoding(int k, int m, int *erasures, char **data_ptrs, char **coding_ptrs) | |||||
721 | { | |||||
722 | int ddf, cdf; | |||||
723 | int *erased; | |||||
724 | char **ptrs; | |||||
725 | int i, j, x; | |||||
726 | ||||||
727 | ddf = 0; | |||||
728 | cdf = 0; | |||||
729 | for (i = 0; erasures[i] != -1; i++) { | |||||
730 | if (erasures[i] < k) ddf++; else cdf++; | |||||
731 | } | |||||
732 | ||||||
733 | erased = jerasure_erasures_to_erased(k, m, erasures); | |||||
734 | if (erased == NULL((void*)0)) return NULL((void*)0); | |||||
735 | ||||||
736 | /* Set up ptrs. It will be as follows: | |||||
737 | ||||||
738 | - If data drive i has not failed, then ptrs[i] = data_ptrs[i]. | |||||
739 | - If data drive i has failed, then ptrs[i] = coding_ptrs[j], where j is the | |||||
740 | lowest unused non-failed coding drive. | |||||
741 | - Elements k to k+ddf-1 are data_ptrs[] of the failed data drives. | |||||
742 | - Elements k+ddf to k+ddf+cdf-1 are coding_ptrs[] of the failed data drives. | |||||
743 | ||||||
744 | The array row_ids contains the ids of ptrs. | |||||
745 | The array ind_to_row_ids contains the row_id of drive i. | |||||
746 | ||||||
747 | However, we're going to set row_ids and ind_to_row in a different procedure. | |||||
748 | */ | |||||
749 | ||||||
750 | ptrs = talloc(char *, k+m)(char * *) malloc(sizeof(char *)*(k+m)); | |||||
751 | ||||||
752 | j = k; | |||||
753 | x = k; | |||||
754 | for (i = 0; i < k; i++) { | |||||
755 | if (erased[i] == 0) { | |||||
756 | ptrs[i] = data_ptrs[i]; | |||||
757 | } else { | |||||
758 | while (erased[j]) j++; | |||||
759 | ptrs[i] = coding_ptrs[j-k]; | |||||
760 | j++; | |||||
761 | ptrs[x] = data_ptrs[i]; | |||||
762 | x++; | |||||
763 | } | |||||
764 | } | |||||
765 | for (i = k; i < k+m; i++) { | |||||
766 | if (erased[i]) { | |||||
767 | ptrs[x] = coding_ptrs[i-k]; | |||||
768 | x++; | |||||
769 | } | |||||
770 | } | |||||
771 | free(erased); | |||||
772 | return ptrs; | |||||
773 | } | |||||
774 | ||||||
775 | static int set_up_ids_for_scheduled_decoding(int k, int m, int *erasures, int *row_ids, int *ind_to_row) | |||||
776 | { | |||||
777 | int ddf, cdf; | |||||
778 | int *erased; | |||||
779 | int i, j, x; | |||||
780 | ||||||
781 | ddf = 0; | |||||
782 | cdf = 0; | |||||
783 | for (i = 0; erasures[i] != -1; i++) { | |||||
784 | if (erasures[i] < k) ddf++; else cdf++; | |||||
785 | } | |||||
786 | ||||||
787 | erased = jerasure_erasures_to_erased(k, m, erasures); | |||||
788 | if (erased == NULL((void*)0)) return -1; | |||||
789 | ||||||
790 | /* See set_up_ptrs_for_scheduled_decoding for how these are set */ | |||||
791 | ||||||
792 | j = k; | |||||
793 | x = k; | |||||
794 | for (i = 0; i < k; i++) { | |||||
795 | if (erased[i] == 0) { | |||||
796 | row_ids[i] = i; | |||||
797 | ind_to_row[i] = i; | |||||
798 | } else { | |||||
799 | while (erased[j]) j++; | |||||
800 | row_ids[i] = j; | |||||
801 | ind_to_row[j] = i; | |||||
802 | j++; | |||||
803 | row_ids[x] = i; | |||||
804 | ind_to_row[i] = x; | |||||
805 | x++; | |||||
806 | } | |||||
807 | } | |||||
808 | for (i = k; i < k+m; i++) { | |||||
809 | if (erased[i]) { | |||||
810 | row_ids[x] = i; | |||||
811 | ind_to_row[i] = x; | |||||
812 | x++; | |||||
813 | } | |||||
814 | } | |||||
815 | free(erased); | |||||
816 | return 0; | |||||
817 | } | |||||
818 | ||||||
819 | static int **jerasure_generate_decoding_schedule(int k, int m, int w, int *bitmatrix, int *erasures, int smart) | |||||
820 | { | |||||
821 | int i, j, x, drive, y, index, z; | |||||
822 | int *decoding_matrix, *inverse, *real_decoding_matrix; | |||||
823 | int *ptr; | |||||
824 | int *row_ids; | |||||
825 | int *ind_to_row; | |||||
826 | int ddf, cdf; | |||||
827 | int **schedule; | |||||
828 | int *b1, *b2; | |||||
829 | ||||||
830 | /* First, figure out the number of data drives that have failed, and the | |||||
831 | number of coding drives that have failed: ddf and cdf */ | |||||
832 | ||||||
833 | ddf = 0; | |||||
834 | cdf = 0; | |||||
835 | for (i = 0; erasures[i] != -1; i++) { | |||||
836 | if (erasures[i] < k) ddf++; else cdf++; | |||||
837 | } | |||||
838 | ||||||
839 | row_ids = talloc(int, k+m)(int *) malloc(sizeof(int)*(k+m)); | |||||
840 | ind_to_row = talloc(int, k+m)(int *) malloc(sizeof(int)*(k+m)); | |||||
841 | ||||||
842 | if (set_up_ids_for_scheduled_decoding(k, m, erasures, row_ids, ind_to_row) < 0) return NULL((void*)0); | |||||
| ||||||
843 | ||||||
844 | /* Now, we're going to create one decoding matrix which is going to | |||||
845 | decode everything with one call. The hope is that the scheduler | |||||
846 | will do a good job. This matrix has w*e rows, where e is the | |||||
847 | number of erasures (ddf+cdf) */ | |||||
848 | ||||||
849 | real_decoding_matrix = talloc(int, k*w*(cdf+ddf)*w)(int *) malloc(sizeof(int)*(k*w*(cdf+ddf)*w)); | |||||
850 | ||||||
851 | /* First, if any data drives have failed, then initialize the first | |||||
852 | ddf*w rows of the decoding matrix from the standard decoding | |||||
853 | matrix inversion */ | |||||
854 | ||||||
855 | if (ddf > 0) { | |||||
856 | ||||||
857 | decoding_matrix = talloc(int, k*k*w*w)(int *) malloc(sizeof(int)*(k*k*w*w)); | |||||
858 | ptr = decoding_matrix; | |||||
859 | for (i = 0; i < k; i++) { | |||||
860 | if (row_ids[i] == i) { | |||||
861 | bzero(ptr, k*w*w*sizeof(int)); | |||||
862 | for (x = 0; x < w; x++) { | |||||
863 | ptr[x+i*w+x*k*w] = 1; | |||||
864 | } | |||||
865 | } else { | |||||
866 | memcpy(ptr, bitmatrix+k*w*w*(row_ids[i]-k), k*w*w*sizeof(int)); | |||||
867 | } | |||||
868 | ptr += (k*w*w); | |||||
869 | } | |||||
870 | inverse = talloc(int, k*k*w*w)(int *) malloc(sizeof(int)*(k*k*w*w)); | |||||
871 | jerasure_invert_bitmatrix(decoding_matrix, inverse, k*w); | |||||
872 | ||||||
873 | /* printf("\nMatrix to invert\n"); | |||||
874 | jerasure_print_bitmatrix(decoding_matrix, k*w, k*w, w); | |||||
875 | printf("\n"); | |||||
876 | printf("\nInverse\n"); | |||||
877 | jerasure_print_bitmatrix(inverse, k*w, k*w, w); | |||||
878 | printf("\n"); */ | |||||
879 | ||||||
880 | free(decoding_matrix); | |||||
881 | ptr = real_decoding_matrix; | |||||
882 | for (i = 0; i < ddf; i++) { | |||||
883 | memcpy(ptr, inverse+k*w*w*row_ids[k+i], sizeof(int)*k*w*w); | |||||
884 | ptr += (k*w*w); | |||||
885 | } | |||||
886 | free(inverse); | |||||
887 | } | |||||
888 | ||||||
889 | /* Next, here comes the hard part. For each coding node that needs | |||||
890 | to be decoded, you start by putting its rows of the distribution | |||||
891 | matrix into the decoding matrix. If there were no failed data | |||||
892 | nodes, then you're done. However, if there have been failed | |||||
893 | data nodes, then you need to modify the columns that correspond | |||||
894 | to the data nodes. You do that by first zeroing them. Then | |||||
895 | whereever there is a one in the distribution matrix, you XOR | |||||
896 | in the corresponding row from the failed data node's entry in | |||||
897 | the decoding matrix. The whole process kind of makes my head | |||||
898 | spin, but it works. | |||||
899 | */ | |||||
900 | ||||||
901 | for (x = 0; x < cdf; x++) { | |||||
902 | drive = row_ids[x+ddf+k]-k; | |||||
903 | ptr = real_decoding_matrix + k*w*w*(ddf+x); | |||||
904 | memcpy(ptr, bitmatrix+drive*k*w*w, sizeof(int)*k*w*w); | |||||
905 | ||||||
906 | for (i = 0; i < k; i++) { | |||||
907 | if (row_ids[i] != i) { | |||||
908 | for (j = 0; j < w; j++) { | |||||
909 | bzero(ptr+j*k*w+i*w, sizeof(int)*w); | |||||
910 | } | |||||
911 | } | |||||
912 | } | |||||
913 | ||||||
914 | /* There's the yucky part */ | |||||
915 | ||||||
916 | index = drive*k*w*w; | |||||
917 | for (i = 0; i < k; i++) { | |||||
918 | if (row_ids[i] != i) { | |||||
919 | b1 = real_decoding_matrix+(ind_to_row[i]-k)*k*w*w; | |||||
920 | for (j = 0; j < w; j++) { | |||||
921 | b2 = ptr + j*k*w; | |||||
922 | for (y = 0; y < w; y++) { | |||||
923 | if (bitmatrix[index+j*k*w+i*w+y]) { | |||||
924 | for (z = 0; z < k*w; z++) { | |||||
925 | b2[z] = b2[z] ^ b1[z+y*k*w]; | |||||
926 | } | |||||
927 | } | |||||
928 | } | |||||
929 | } | |||||
930 | } | |||||
931 | } | |||||
932 | } | |||||
933 | ||||||
934 | /* | |||||
935 | printf("\n\nReal Decoding Matrix\n\n"); | |||||
936 | jerasure_print_bitmatrix(real_decoding_matrix, (ddf+cdf)*w, k*w, w); | |||||
937 | printf("\n"); */ | |||||
938 | if (smart) { | |||||
939 | schedule = jerasure_smart_bitmatrix_to_schedule(k, ddf+cdf, w, real_decoding_matrix); | |||||
940 | } else { | |||||
941 | schedule = jerasure_dumb_bitmatrix_to_schedule(k, ddf+cdf, w, real_decoding_matrix); | |||||
942 | } | |||||
943 | free(row_ids); | |||||
944 | free(ind_to_row); | |||||
945 | free(real_decoding_matrix); | |||||
946 | return schedule; | |||||
947 | } | |||||
948 | ||||||
949 | int jerasure_schedule_decode_lazy(int k, int m, int w, int *bitmatrix, int *erasures, | |||||
950 | char **data_ptrs, char **coding_ptrs, int size, int packetsize, | |||||
951 | int smart) | |||||
952 | { | |||||
953 | int i, tdone; | |||||
954 | char **ptrs; | |||||
955 | int **schedule; | |||||
956 | ||||||
957 | ptrs = set_up_ptrs_for_scheduled_decoding(k, m, erasures, data_ptrs, coding_ptrs); | |||||
958 | if (ptrs == NULL((void*)0)) return -1; | |||||
959 | ||||||
960 | schedule = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart); | |||||
961 | if (schedule == NULL((void*)0)) { | |||||
962 | free(ptrs); | |||||
963 | return -1; | |||||
964 | } | |||||
965 | ||||||
966 | for (tdone = 0; tdone < size; tdone += packetsize*w) { | |||||
967 | jerasure_do_scheduled_operations(ptrs, schedule, packetsize); | |||||
968 | for (i = 0; i < k+m; i++) ptrs[i] += (packetsize*w); | |||||
969 | } | |||||
970 | ||||||
971 | jerasure_free_schedule(schedule); | |||||
972 | free(ptrs); | |||||
973 | ||||||
974 | return 0; | |||||
975 | } | |||||
976 | ||||||
977 | int jerasure_schedule_decode_cache(int k, int m, int w, int ***scache, int *erasures, | |||||
978 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||||
979 | { | |||||
980 | int i, tdone; | |||||
981 | char **ptrs; | |||||
982 | int **schedule; | |||||
983 | int index; | |||||
984 | ||||||
985 | if (erasures[1] == -1) { | |||||
986 | index = erasures[0]*(k+m) + erasures[0]; | |||||
987 | } else if (erasures[2] == -1) { | |||||
988 | index = erasures[0]*(k+m) + erasures[1]; | |||||
989 | } else { | |||||
990 | return -1; | |||||
991 | } | |||||
992 | ||||||
993 | schedule = scache[index]; | |||||
994 | ||||||
995 | ptrs = set_up_ptrs_for_scheduled_decoding(k, m, erasures, data_ptrs, coding_ptrs); | |||||
996 | if (ptrs == NULL((void*)0)) return -1; | |||||
997 | ||||||
998 | ||||||
999 | for (tdone = 0; tdone < size; tdone += packetsize*w) { | |||||
1000 | jerasure_do_scheduled_operations(ptrs, schedule, packetsize); | |||||
1001 | for (i = 0; i < k+m; i++) ptrs[i] += (packetsize*w); | |||||
1002 | } | |||||
1003 | ||||||
1004 | free(ptrs); | |||||
1005 | ||||||
1006 | return 0; | |||||
1007 | } | |||||
1008 | ||||||
1009 | /* This only works when m = 2 */ | |||||
1010 | ||||||
1011 | int ***jerasure_generate_schedule_cache(int k, int m, int w, int *bitmatrix, int smart) | |||||
1012 | { | |||||
1013 | int ***scache; | |||||
1014 | int erasures[3]; | |||||
1015 | int e1, e2; | |||||
1016 | ||||||
1017 | /* Ok -- this is yucky, but it's how I'm doing it. You will make an index out | |||||
1018 | of erasures, which will be e1*(k+m)+(e2). If there is no e2, then e2 = e1. | |||||
1019 | Isn't that clever and confusing. Sorry. | |||||
1020 | ||||||
1021 | We're not going to worry about ordering -- in other words, the schedule for | |||||
1022 | e1,e2 will be the same as e2,e1. They will have the same pointer -- the | |||||
1023 | schedule will not be duplicated. */ | |||||
1024 | ||||||
1025 | if (m != 2) return NULL((void*)0); | |||||
| ||||||
1026 | ||||||
1027 | scache = talloc(int **, (k+m)*(k+m+1))(int ** *) malloc(sizeof(int **)*((k+m)*(k+m+1))); | |||||
1028 | if (scache == NULL((void*)0)) return NULL((void*)0); | |||||
1029 | ||||||
1030 | for (e1 = 0; e1 < k+m; e1++) { | |||||
1031 | erasures[0] = e1; | |||||
1032 | for (e2 = 0; e2 < e1; e2++) { | |||||
1033 | erasures[1] = e2; | |||||
1034 | erasures[2] = -1; | |||||
1035 | scache[e1*(k+m)+e2] = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart); | |||||
1036 | scache[e2*(k+m)+e1] = scache[e1*(k+m)+e2]; | |||||
1037 | } | |||||
1038 | erasures[1] = -1; | |||||
1039 | scache[e1*(k+m)+e1] = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart); | |||||
1040 | } | |||||
1041 | return scache; | |||||
1042 | ||||||
1043 | } | |||||
1044 | ||||||
1045 | int jerasure_invert_bitmatrix(int *mat, int *inv, int rows) | |||||
1046 | { | |||||
1047 | int cols, i, j, k; | |||||
1048 | int tmp; | |||||
1049 | ||||||
1050 | cols = rows; | |||||
1051 | ||||||
1052 | k = 0; | |||||
1053 | for (i = 0; i < rows; i++) { | |||||
1054 | for (j = 0; j < cols; j++) { | |||||
1055 | inv[k] = (i == j) ? 1 : 0; | |||||
1056 | k++; | |||||
1057 | } | |||||
1058 | } | |||||
1059 | ||||||
1060 | /* First -- convert into upper triangular */ | |||||
1061 | ||||||
1062 | for (i = 0; i < cols; i++) { | |||||
1063 | ||||||
1064 | /* Swap rows if we have a zero i,i element. If we can't swap, then the | |||||
1065 | matrix was not invertible */ | |||||
1066 | ||||||
1067 | if ((mat[i*cols+i]) == 0) { | |||||
1068 | for (j = i+1; j < rows && (mat[j*cols+i]) == 0; j++) ; | |||||
1069 | if (j == rows) return -1; | |||||
1070 | for (k = 0; k < cols; k++) { | |||||
1071 | tmp = mat[i*cols+k]; mat[i*cols+k] = mat[j*cols+k]; mat[j*cols+k] = tmp; | |||||
1072 | tmp = inv[i*cols+k]; inv[i*cols+k] = inv[j*cols+k]; inv[j*cols+k] = tmp; | |||||
1073 | } | |||||
1074 | } | |||||
1075 | ||||||
1076 | /* Now for each j>i, add A_ji*Ai to Aj */ | |||||
1077 | for (j = i+1; j != rows; j++) { | |||||
1078 | if (mat[j*cols+i] != 0) { | |||||
1079 | for (k = 0; k < cols; k++) { | |||||
1080 | mat[j*cols+k] ^= mat[i*cols+k]; | |||||
1081 | inv[j*cols+k] ^= inv[i*cols+k]; | |||||
1082 | } | |||||
1083 | } | |||||
1084 | } | |||||
1085 | } | |||||
1086 | ||||||
1087 | /* Now the matrix is upper triangular. Start at the top and multiply down */ | |||||
1088 | ||||||
1089 | for (i = rows-1; i >= 0; i--) { | |||||
1090 | for (j = 0; j < i; j++) { | |||||
1091 | if (mat[j*cols+i]) { | |||||
1092 | for (k = 0; k < cols; k++) { | |||||
1093 | mat[j*cols+k] ^= mat[i*cols+k]; | |||||
1094 | inv[j*cols+k] ^= inv[i*cols+k]; | |||||
1095 | } | |||||
1096 | } | |||||
1097 | } | |||||
1098 | } | |||||
1099 | return 0; | |||||
1100 | } | |||||
1101 | ||||||
1102 | int jerasure_invertible_bitmatrix(int *mat, int rows) | |||||
1103 | { | |||||
1104 | int cols, i, j, k; | |||||
1105 | int tmp; | |||||
1106 | ||||||
1107 | cols = rows; | |||||
1108 | ||||||
1109 | /* First -- convert into upper triangular */ | |||||
1110 | ||||||
1111 | for (i = 0; i < cols; i++) { | |||||
1112 | ||||||
1113 | /* Swap rows if we have a zero i,i element. If we can't swap, then the | |||||
1114 | matrix was not invertible */ | |||||
1115 | ||||||
1116 | if ((mat[i*cols+i]) == 0) { | |||||
1117 | for (j = i+1; j < rows && (mat[j*cols+i]) == 0; j++) ; | |||||
1118 | if (j == rows) return 0; | |||||
1119 | for (k = 0; k < cols; k++) { | |||||
1120 | tmp = mat[i*cols+k]; mat[i*cols+k] = mat[j*cols+k]; mat[j*cols+k] = tmp; | |||||
1121 | } | |||||
1122 | } | |||||
1123 | ||||||
1124 | /* Now for each j>i, add A_ji*Ai to Aj */ | |||||
1125 | for (j = i+1; j != rows; j++) { | |||||
1126 | if (mat[j*cols+i] != 0) { | |||||
1127 | for (k = 0; k < cols; k++) { | |||||
1128 | mat[j*cols+k] ^= mat[i*cols+k]; | |||||
1129 | } | |||||
1130 | } | |||||
1131 | } | |||||
1132 | } | |||||
1133 | return 1; | |||||
1134 | } | |||||
1135 | ||||||
1136 | ||||||
1137 | int *jerasure_matrix_multiply(int *m1, int *m2, int r1, int c1, int r2, int c2, int w) | |||||
1138 | { | |||||
1139 | int *product, i, j, k; | |||||
1140 | ||||||
1141 | product = (int *) malloc(sizeof(int)*r1*c2); | |||||
1142 | for (i = 0; i < r1*c2; i++) product[i] = 0; | |||||
1143 | ||||||
1144 | for (i = 0; i < r1; i++) { | |||||
1145 | for (j = 0; j < c2; j++) { | |||||
1146 | for (k = 0; k < r2; k++) { | |||||
1147 | product[i*c2+j] ^= galois_single_multiply(m1[i*c1+k], m2[k*c2+j], w); | |||||
1148 | } | |||||
1149 | } | |||||
1150 | } | |||||
1151 | return product; | |||||
1152 | } | |||||
1153 | ||||||
1154 | void jerasure_get_stats(double *fill_in) | |||||
1155 | { | |||||
1156 | fill_in[0] = jerasure_total_xor_bytes; | |||||
1157 | fill_in[1] = jerasure_total_gf_bytes; | |||||
1158 | fill_in[2] = jerasure_total_memcpy_bytes; | |||||
1159 | jerasure_total_xor_bytes = 0; | |||||
1160 | jerasure_total_gf_bytes = 0; | |||||
1161 | jerasure_total_memcpy_bytes = 0; | |||||
1162 | } | |||||
1163 | ||||||
1164 | void jerasure_do_scheduled_operations(char **ptrs, int **operations, int packetsize) | |||||
1165 | { | |||||
1166 | char *sptr; | |||||
1167 | char *dptr; | |||||
1168 | int op; | |||||
1169 | ||||||
1170 | for (op = 0; operations[op][0] >= 0; op++) { | |||||
1171 | sptr = ptrs[operations[op][0]] + operations[op][1]*packetsize; | |||||
1172 | dptr = ptrs[operations[op][2]] + operations[op][3]*packetsize; | |||||
1173 | if (operations[op][4]) { | |||||
1174 | /* printf("%d,%d %d,%d\n", operations[op][0], | |||||
1175 | operations[op][1], | |||||
1176 | operations[op][2], | |||||
1177 | operations[op][3]); | |||||
1178 | printf("xor(0x%x, 0x%x -> 0x%x, %d)\n", sptr, dptr, dptr, packetsize); */ | |||||
1179 | galois_region_xor(sptr, dptr, packetsize); | |||||
1180 | jerasure_total_xor_bytes += packetsize; | |||||
1181 | } else { | |||||
1182 | /* printf("memcpy(0x%x <- 0x%x)\n", dptr, sptr); */ | |||||
1183 | memcpy(dptr, sptr, packetsize); | |||||
1184 | jerasure_total_memcpy_bytes += packetsize; | |||||
1185 | } | |||||
1186 | } | |||||
1187 | } | |||||
1188 | ||||||
1189 | void jerasure_schedule_encode(int k, int m, int w, int **schedule, | |||||
1190 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||||
1191 | { | |||||
1192 | char **ptr_copy; | |||||
1193 | int i, tdone; | |||||
1194 | ||||||
1195 | ptr_copy = talloc(char *, (k+m))(char * *) malloc(sizeof(char *)*((k+m))); | |||||
1196 | for (i = 0; i < k; i++) ptr_copy[i] = data_ptrs[i]; | |||||
1197 | for (i = 0; i < m; i++) ptr_copy[i+k] = coding_ptrs[i]; | |||||
1198 | for (tdone = 0; tdone < size; tdone += packetsize*w) { | |||||
1199 | jerasure_do_scheduled_operations(ptr_copy, schedule, packetsize); | |||||
1200 | for (i = 0; i < k+m; i++) ptr_copy[i] += (packetsize*w); | |||||
1201 | } | |||||
1202 | free(ptr_copy); | |||||
1203 | } | |||||
1204 | ||||||
1205 | int **jerasure_dumb_bitmatrix_to_schedule(int k, int m, int w, int *bitmatrix) | |||||
1206 | { | |||||
1207 | int **operations; | |||||
1208 | int op; | |||||
1209 | int index, optodo, i, j; | |||||
1210 | ||||||
1211 | operations = talloc(int *, k*m*w*w+1)(int * *) malloc(sizeof(int *)*(k*m*w*w+1)); | |||||
1212 | op = 0; | |||||
1213 | ||||||
1214 | index = 0; | |||||
1215 | for (i = 0; i < m*w; i++) { | |||||
1216 | optodo = 0; | |||||
1217 | for (j = 0; j < k*w; j++) { | |||||
1218 | if (bitmatrix[index]) { | |||||
1219 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||||
1220 | operations[op][4] = optodo; | |||||
1221 | operations[op][0] = j/w; | |||||
1222 | operations[op][1] = j%w; | |||||
1223 | operations[op][2] = k+i/w; | |||||
1224 | operations[op][3] = i%w; | |||||
1225 | optodo = 1; | |||||
1226 | op++; | |||||
1227 | ||||||
1228 | } | |||||
1229 | index++; | |||||
1230 | } | |||||
1231 | } | |||||
1232 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||||
1233 | operations[op][0] = -1; | |||||
1234 | return operations; | |||||
1235 | } | |||||
1236 | ||||||
1237 | int **jerasure_smart_bitmatrix_to_schedule(int k, int m, int w, int *bitmatrix) | |||||
1238 | { | |||||
1239 | int **operations; | |||||
1240 | int op; | |||||
1241 | int i, j; | |||||
1242 | int *diff, *from, *b1, *flink, *blink; | |||||
1243 | int *ptr, no, row; | |||||
1244 | int optodo; | |||||
1245 | int bestrow = 0, bestdiff, top; | |||||
1246 | ||||||
1247 | /* printf("Scheduling:\n\n"); | |||||
1248 | jerasure_print_bitmatrix(bitmatrix, m*w, k*w, w); */ | |||||
1249 | ||||||
1250 | operations = talloc(int *, k*m*w*w+1)(int * *) malloc(sizeof(int *)*(k*m*w*w+1)); | |||||
1251 | op = 0; | |||||
1252 | ||||||
1253 | diff = talloc(int, m*w)(int *) malloc(sizeof(int)*(m*w)); | |||||
1254 | from = talloc(int, m*w)(int *) malloc(sizeof(int)*(m*w)); | |||||
1255 | flink = talloc(int, m*w)(int *) malloc(sizeof(int)*(m*w)); | |||||
1256 | blink = talloc(int, m*w)(int *) malloc(sizeof(int)*(m*w)); | |||||
1257 | ||||||
1258 | ptr = bitmatrix; | |||||
1259 | ||||||
1260 | bestdiff = k*w+1; | |||||
1261 | top = 0; | |||||
1262 | for (i = 0; i < m*w; i++) { | |||||
1263 | no = 0; | |||||
1264 | for (j = 0; j < k*w; j++) { | |||||
1265 | no += *ptr; | |||||
1266 | ptr++; | |||||
1267 | } | |||||
1268 | diff[i] = no; | |||||
1269 | from[i] = -1; | |||||
1270 | flink[i] = i+1; | |||||
1271 | blink[i] = i-1; | |||||
1272 | if (no < bestdiff) { | |||||
1273 | bestdiff = no; | |||||
1274 | bestrow = i; | |||||
1275 | } | |||||
1276 | } | |||||
1277 | ||||||
1278 | flink[m*w-1] = -1; | |||||
1279 | ||||||
1280 | while (top != -1) { | |||||
1281 | row = bestrow; | |||||
1282 | /* printf("Doing row %d - %d from %d\n", row, diff[row], from[row]); */ | |||||
1283 | ||||||
1284 | if (blink[row] == -1) { | |||||
1285 | top = flink[row]; | |||||
1286 | if (top != -1) blink[top] = -1; | |||||
1287 | } else { | |||||
1288 | flink[blink[row]] = flink[row]; | |||||
1289 | if (flink[row] != -1) { | |||||
1290 | blink[flink[row]] = blink[row]; | |||||
1291 | } | |||||
1292 | } | |||||
1293 | ||||||
1294 | ptr = bitmatrix + row*k*w; | |||||
1295 | if (from[row] == -1) { | |||||
1296 | optodo = 0; | |||||
1297 | for (j = 0; j < k*w; j++) { | |||||
1298 | if (ptr[j]) { | |||||
1299 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||||
1300 | operations[op][4] = optodo; | |||||
1301 | operations[op][0] = j/w; | |||||
1302 | operations[op][1] = j%w; | |||||
1303 | operations[op][2] = k+row/w; | |||||
1304 | operations[op][3] = row%w; | |||||
1305 | optodo = 1; | |||||
1306 | op++; | |||||
1307 | } | |||||
1308 | } | |||||
1309 | } else { | |||||
1310 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||||
1311 | operations[op][4] = 0; | |||||
1312 | operations[op][0] = k+from[row]/w; | |||||
1313 | operations[op][1] = from[row]%w; | |||||
1314 | operations[op][2] = k+row/w; | |||||
1315 | operations[op][3] = row%w; | |||||
1316 | op++; | |||||
1317 | b1 = bitmatrix + from[row]*k*w; | |||||
1318 | for (j = 0; j < k*w; j++) { | |||||
1319 | if (ptr[j] ^ b1[j]) { | |||||
1320 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||||
1321 | operations[op][4] = 1; | |||||
1322 | operations[op][0] = j/w; | |||||
1323 | operations[op][1] = j%w; | |||||
1324 | operations[op][2] = k+row/w; | |||||
1325 | operations[op][3] = row%w; | |||||
1326 | optodo = 1; | |||||
1327 | op++; | |||||
1328 | } | |||||
1329 | } | |||||
1330 | } | |||||
1331 | bestdiff = k*w+1; | |||||
1332 | for (i = top; i != -1; i = flink[i]) { | |||||
1333 | no = 1; | |||||
1334 | b1 = bitmatrix + i*k*w; | |||||
1335 | for (j = 0; j < k*w; j++) no += (ptr[j] ^ b1[j]); | |||||
1336 | if (no < diff[i]) { | |||||
1337 | from[i] = row; | |||||
1338 | diff[i] = no; | |||||
1339 | } | |||||
1340 | if (diff[i] < bestdiff) { | |||||
1341 | bestdiff = diff[i]; | |||||
1342 | bestrow = i; | |||||
1343 | } | |||||
1344 | } | |||||
1345 | } | |||||
1346 | ||||||
1347 | operations[op] = talloc(int, 5)(int *) malloc(sizeof(int)*(5)); | |||||
1348 | operations[op][0] = -1; | |||||
1349 | free(from); | |||||
1350 | free(diff); | |||||
1351 | free(blink); | |||||
1352 | free(flink); | |||||
1353 | ||||||
1354 | return operations; | |||||
1355 | } | |||||
1356 | ||||||
1357 | void jerasure_bitmatrix_encode(int k, int m, int w, int *bitmatrix, | |||||
1358 | char **data_ptrs, char **coding_ptrs, int size, int packetsize) | |||||
1359 | { | |||||
1360 | int i; | |||||
1361 | ||||||
1362 | if (packetsize%sizeof(long) != 0) { | |||||
1363 | fprintf(stderr, "jerasure_bitmatrix_encode - packetsize(%d) %c sizeof(long) != 0\n", packetsize, '%')__fprintf_chk (stderr, 2 - 1, "jerasure_bitmatrix_encode - packetsize(%d) %c sizeof(long) != 0\n" , packetsize, '%'); | |||||
1364 | assert(0)((void) (0)); | |||||
1365 | } | |||||
1366 | if (size%(packetsize*w) != 0) { | |||||
1367 | fprintf(stderr, "jerasure_bitmatrix_encode - size(%d) %c (packetsize(%d)*w(%d))) != 0\n",__fprintf_chk (stderr, 2 - 1, "jerasure_bitmatrix_encode - size(%d) %c (packetsize(%d)*w(%d))) != 0\n" , size, '%', packetsize, w) | |||||
1368 | size, '%', packetsize, w)__fprintf_chk (stderr, 2 - 1, "jerasure_bitmatrix_encode - size(%d) %c (packetsize(%d)*w(%d))) != 0\n" , size, '%', packetsize, w); | |||||
1369 | assert(0)((void) (0)); | |||||
1370 | } | |||||
1371 | ||||||
1372 | for (i = 0; i < m; i++) { | |||||
1373 | jerasure_bitmatrix_dotprod(k, w, bitmatrix+i*k*w*w, NULL((void*)0), k+i, data_ptrs, coding_ptrs, size, packetsize); | |||||
1374 | } | |||||
1375 | } | |||||
1376 | ||||||
1377 | /* | |||||
1378 | * Exported function for use by autoconf to perform quick | |||||
1379 | * spot-check. | |||||
1380 | */ | |||||
1381 | int jerasure_autoconf_test() | |||||
1382 | { | |||||
1383 | int x = galois_single_multiply(1, 2, 8); | |||||
1384 | if (x != 2) { | |||||
1385 | return -1; | |||||
1386 | } | |||||
1387 | return 0; | |||||
1388 | } | |||||
1389 |