File: | home/bhubbard/working/src/ceph/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c |
Warning: | line 931, column 3 Value stored to 'aligned_end' is never read |
[?] Use j/k keys for keyboard navigation
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2014 Intel Corporation |
3 | */ |
4 | #include <stdint.h> |
5 | #include <stddef.h> |
6 | #include <stdlib.h> |
7 | #include <stdio.h> |
8 | #include <stdarg.h> |
9 | #include <errno(*__errno_location ()).h> |
10 | #include <sys/queue.h> |
11 | |
12 | #include <rte_memory.h> |
13 | #include <rte_errno(per_lcore__rte_errno).h> |
14 | #include <rte_eal.h> |
15 | #include <rte_eal_memconfig.h> |
16 | #include <rte_launch.h> |
17 | #include <rte_per_lcore.h> |
18 | #include <rte_lcore.h> |
19 | #include <rte_common.h> |
20 | #include <rte_string_fns.h> |
21 | #include <rte_spinlock.h> |
22 | #include <rte_memcpy.h> |
23 | #include <rte_atomic.h> |
24 | #include <rte_fbarray.h> |
25 | |
26 | #include "eal_internal_cfg.h" |
27 | #include "eal_memalloc.h" |
28 | #include "malloc_elem.h" |
29 | #include "malloc_heap.h" |
30 | #include "malloc_mp.h" |
31 | |
32 | /* start external socket ID's at a very high number */ |
33 | #define CONST_MAX(a, b)(a > b ? a : b) (a > b ? a : b) /* RTE_MAX is not a constant */ |
34 | #define EXTERNAL_HEAP_MIN_SOCKET_ID(((1 << 8) > 8 ? (1 << 8) : 8)) (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES)((1 << 8) > 8 ? (1 << 8) : 8)) |
35 | |
36 | static unsigned |
37 | check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) |
38 | { |
39 | unsigned check_flag = 0; |
40 | |
41 | if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY0x00000004)) |
42 | return 1; |
43 | |
44 | switch (hugepage_sz) { |
45 | case RTE_PGSIZE_256K: |
46 | check_flag = RTE_MEMZONE_256KB0x00010000; |
47 | break; |
48 | case RTE_PGSIZE_2M: |
49 | check_flag = RTE_MEMZONE_2MB0x00000001; |
50 | break; |
51 | case RTE_PGSIZE_16M: |
52 | check_flag = RTE_MEMZONE_16MB0x00000100; |
53 | break; |
54 | case RTE_PGSIZE_256M: |
55 | check_flag = RTE_MEMZONE_256MB0x00020000; |
56 | break; |
57 | case RTE_PGSIZE_512M: |
58 | check_flag = RTE_MEMZONE_512MB0x00040000; |
59 | break; |
60 | case RTE_PGSIZE_1G: |
61 | check_flag = RTE_MEMZONE_1GB0x00000002; |
62 | break; |
63 | case RTE_PGSIZE_4G: |
64 | check_flag = RTE_MEMZONE_4GB0x00080000; |
65 | break; |
66 | case RTE_PGSIZE_16G: |
67 | check_flag = RTE_MEMZONE_16GB0x00000200; |
68 | } |
69 | |
70 | return check_flag & flags; |
71 | } |
72 | |
73 | int |
74 | malloc_socket_to_heap_id(unsigned int socket_id) |
75 | { |
76 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
77 | int i; |
78 | |
79 | for (i = 0; i < RTE_MAX_HEAPS32; i++) { |
80 | struct malloc_heap *heap = &mcfg->malloc_heaps[i]; |
81 | |
82 | if (heap->socket_id == socket_id) |
83 | return i; |
84 | } |
85 | return -1; |
86 | } |
87 | |
88 | /* |
89 | * Expand the heap with a memory area. |
90 | */ |
91 | static struct malloc_elem * |
92 | malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl, |
93 | void *start, size_t len) |
94 | { |
95 | struct malloc_elem *elem = start; |
96 | |
97 | malloc_elem_init(elem, heap, msl, len, elem, len); |
98 | |
99 | malloc_elem_insert(elem); |
100 | |
101 | elem = malloc_elem_join_adjacent_free(elem); |
102 | |
103 | malloc_elem_free_list_insert(elem); |
104 | |
105 | return elem; |
106 | } |
107 | |
108 | static int |
109 | malloc_add_seg(const struct rte_memseg_list *msl, |
110 | const struct rte_memseg *ms, size_t len, void *arg __rte_unused__attribute__((__unused__))) |
111 | { |
112 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
113 | struct rte_memseg_list *found_msl; |
114 | struct malloc_heap *heap; |
115 | int msl_idx, heap_idx; |
116 | |
117 | if (msl->external) |
118 | return 0; |
119 | |
120 | heap_idx = malloc_socket_to_heap_id(msl->socket_id); |
121 | if (heap_idx < 0) { |
122 | RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n")rte_log(4U, 0, "EAL" ": " "Memseg list has invalid socket id\n" ); |
123 | return -1; |
124 | } |
125 | heap = &mcfg->malloc_heaps[heap_idx]; |
126 | |
127 | /* msl is const, so find it */ |
128 | msl_idx = msl - mcfg->memsegs; |
129 | |
130 | if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS64) |
131 | return -1; |
132 | |
133 | found_msl = &mcfg->memsegs[msl_idx]; |
134 | |
135 | malloc_heap_add_memory(heap, found_msl, ms->addr, len); |
136 | |
137 | heap->total_size += len; |
138 | |
139 | RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,rte_log(8U, 0, "EAL" ": " "Added %zuM to heap on socket %i\n" , len >> 20, msl->socket_id) |
140 | msl->socket_id)rte_log(8U, 0, "EAL" ": " "Added %zuM to heap on socket %i\n" , len >> 20, msl->socket_id); |
141 | return 0; |
142 | } |
143 | |
144 | /* |
145 | * Iterates through the freelist for a heap to find a free element |
146 | * which can store data of the required size and with the requested alignment. |
147 | * If size is 0, find the biggest available elem. |
148 | * Returns null on failure, or pointer to element on success. |
149 | */ |
150 | static struct malloc_elem * |
151 | find_suitable_element(struct malloc_heap *heap, size_t size, |
152 | unsigned int flags, size_t align, size_t bound, bool_Bool contig) |
153 | { |
154 | size_t idx; |
155 | struct malloc_elem *elem, *alt_elem = NULL((void*)0); |
156 | |
157 | for (idx = malloc_elem_free_list_index(size); |
158 | idx < RTE_HEAP_NUM_FREELISTS13; idx++) { |
159 | for (elem = LIST_FIRST(&heap->free_head[idx])((&heap->free_head[idx])->lh_first); |
160 | !!elem; elem = LIST_NEXT(elem, free_list)((elem)->free_list.le_next)) { |
161 | if (malloc_elem_can_hold(elem, size, align, bound, |
162 | contig)) { |
163 | if (check_hugepage_sz(flags, |
164 | elem->msl->page_sz)) |
165 | return elem; |
166 | if (alt_elem == NULL((void*)0)) |
167 | alt_elem = elem; |
168 | } |
169 | } |
170 | } |
171 | |
172 | if ((alt_elem != NULL((void*)0)) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY0x00000004)) |
173 | return alt_elem; |
174 | |
175 | return NULL((void*)0); |
176 | } |
177 | |
178 | /* |
179 | * Iterates through the freelist for a heap to find a free element with the |
180 | * biggest size and requested alignment. Will also set size to whatever element |
181 | * size that was found. |
182 | * Returns null on failure, or pointer to element on success. |
183 | */ |
184 | static struct malloc_elem * |
185 | find_biggest_element(struct malloc_heap *heap, size_t *size, |
186 | unsigned int flags, size_t align, bool_Bool contig) |
187 | { |
188 | struct malloc_elem *elem, *max_elem = NULL((void*)0); |
189 | size_t idx, max_size = 0; |
190 | |
191 | for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS13; idx++) { |
192 | for (elem = LIST_FIRST(&heap->free_head[idx])((&heap->free_head[idx])->lh_first); |
193 | !!elem; elem = LIST_NEXT(elem, free_list)((elem)->free_list.le_next)) { |
194 | size_t cur_size; |
195 | if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY0x00000004) == 0 && |
196 | !check_hugepage_sz(flags, |
197 | elem->msl->page_sz)) |
198 | continue; |
199 | if (contig) { |
200 | cur_size = |
201 | malloc_elem_find_max_iova_contig(elem, |
202 | align); |
203 | } else { |
204 | void *data_start = RTE_PTR_ADD(elem,((void*)((uintptr_t)(elem) + (MALLOC_ELEM_HEADER_LEN))) |
205 | MALLOC_ELEM_HEADER_LEN)((void*)((uintptr_t)(elem) + (MALLOC_ELEM_HEADER_LEN))); |
206 | void *data_end = RTE_PTR_ADD(elem, elem->size -((void*)((uintptr_t)(elem) + (elem->size - MALLOC_ELEM_TRAILER_LEN ))) |
207 | MALLOC_ELEM_TRAILER_LEN)((void*)((uintptr_t)(elem) + (elem->size - MALLOC_ELEM_TRAILER_LEN ))); |
208 | void *aligned = RTE_PTR_ALIGN_CEIL(data_start,((__typeof__((__typeof__(data_start))((void*)((uintptr_t)(data_start ) + ((align) - 1)))))(__typeof__((uintptr_t)(__typeof__(data_start ))((void*)((uintptr_t)(data_start) + ((align) - 1)))))(((uintptr_t )(__typeof__(data_start))((void*)((uintptr_t)(data_start) + ( (align) - 1)))) & (~((__typeof__((uintptr_t)(__typeof__(data_start ))((void*)((uintptr_t)(data_start) + ((align) - 1)))))((align ) - 1))))) |
209 | align)((__typeof__((__typeof__(data_start))((void*)((uintptr_t)(data_start ) + ((align) - 1)))))(__typeof__((uintptr_t)(__typeof__(data_start ))((void*)((uintptr_t)(data_start) + ((align) - 1)))))(((uintptr_t )(__typeof__(data_start))((void*)((uintptr_t)(data_start) + ( (align) - 1)))) & (~((__typeof__((uintptr_t)(__typeof__(data_start ))((void*)((uintptr_t)(data_start) + ((align) - 1)))))((align ) - 1))))); |
210 | /* check if aligned data start is beyond end */ |
211 | if (aligned >= data_end) |
212 | continue; |
213 | cur_size = RTE_PTR_DIFF(data_end, aligned)((uintptr_t)(data_end) - (uintptr_t)(aligned)); |
214 | } |
215 | if (cur_size > max_size) { |
216 | max_size = cur_size; |
217 | max_elem = elem; |
218 | } |
219 | } |
220 | } |
221 | |
222 | *size = max_size; |
223 | return max_elem; |
224 | } |
225 | |
226 | /* |
227 | * Main function to allocate a block of memory from the heap. |
228 | * It locks the free list, scans it, and adds a new memseg if the |
229 | * scan fails. Once the new memseg is added, it re-scans and should return |
230 | * the new element after releasing the lock. |
231 | */ |
232 | static void * |
233 | heap_alloc(struct malloc_heap *heap, const char *type __rte_unused__attribute__((__unused__)), size_t size, |
234 | unsigned int flags, size_t align, size_t bound, bool_Bool contig) |
235 | { |
236 | struct malloc_elem *elem; |
237 | |
238 | size = RTE_CACHE_LINE_ROUNDUP(size)(64 * ((size + 64 - 1) / 64)); |
239 | align = RTE_CACHE_LINE_ROUNDUP(align)(64 * ((align + 64 - 1) / 64)); |
240 | |
241 | elem = find_suitable_element(heap, size, flags, align, bound, contig); |
242 | if (elem != NULL((void*)0)) { |
243 | elem = malloc_elem_alloc(elem, size, align, bound, contig); |
244 | |
245 | /* increase heap's count of allocated elements */ |
246 | heap->alloc_count++; |
247 | } |
248 | |
249 | return elem == NULL((void*)0) ? NULL((void*)0) : (void *)(&elem[1]); |
250 | } |
251 | |
252 | static void * |
253 | heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused__attribute__((__unused__)), |
254 | unsigned int flags, size_t align, bool_Bool contig) |
255 | { |
256 | struct malloc_elem *elem; |
257 | size_t size; |
258 | |
259 | align = RTE_CACHE_LINE_ROUNDUP(align)(64 * ((align + 64 - 1) / 64)); |
260 | |
261 | elem = find_biggest_element(heap, &size, flags, align, contig); |
262 | if (elem != NULL((void*)0)) { |
263 | elem = malloc_elem_alloc(elem, size, align, 0, contig); |
264 | |
265 | /* increase heap's count of allocated elements */ |
266 | heap->alloc_count++; |
267 | } |
268 | |
269 | return elem == NULL((void*)0) ? NULL((void*)0) : (void *)(&elem[1]); |
270 | } |
271 | |
272 | /* this function is exposed in malloc_mp.h */ |
273 | void |
274 | rollback_expand_heap(struct rte_memseg **ms, int n_segs, |
275 | struct malloc_elem *elem, void *map_addr, size_t map_len) |
276 | { |
277 | if (elem != NULL((void*)0)) { |
278 | malloc_elem_free_list_remove(elem); |
279 | malloc_elem_hide_region(elem, map_addr, map_len); |
280 | } |
281 | |
282 | eal_memalloc_free_seg_bulk(ms, n_segs); |
283 | } |
284 | |
285 | /* this function is exposed in malloc_mp.h */ |
286 | struct malloc_elem * |
287 | alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, |
288 | int socket, unsigned int flags, size_t align, size_t bound, |
289 | bool_Bool contig, struct rte_memseg **ms, int n_segs) |
290 | { |
291 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
292 | struct rte_memseg_list *msl; |
293 | struct malloc_elem *elem = NULL((void*)0); |
294 | size_t alloc_sz; |
295 | int allocd_pages; |
296 | void *ret, *map_addr; |
297 | |
298 | alloc_sz = (size_t)pg_sz * n_segs; |
299 | |
300 | /* first, check if we're allowed to allocate this memory */ |
301 | if (eal_memalloc_mem_alloc_validate(socket, |
302 | heap->total_size + alloc_sz) < 0) { |
303 | RTE_LOG(DEBUG, EAL, "User has disallowed allocation\n")rte_log(8U, 0, "EAL" ": " "User has disallowed allocation\n"); |
304 | return NULL((void*)0); |
305 | } |
306 | |
307 | allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz, |
308 | socket, true1); |
309 | |
310 | /* make sure we've allocated our pages... */ |
311 | if (allocd_pages < 0) |
312 | return NULL((void*)0); |
313 | |
314 | map_addr = ms[0]->addr; |
315 | msl = rte_mem_virt2memseg_list(map_addr); |
316 | |
317 | /* check if we wanted contiguous memory but didn't get it */ |
318 | if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) { |
319 | RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n",rte_log(8U, 0, "EAL" ": " "%s(): couldn't allocate physically contiguous space\n" , __func__) |
320 | __func__)rte_log(8U, 0, "EAL" ": " "%s(): couldn't allocate physically contiguous space\n" , __func__); |
321 | goto fail; |
322 | } |
323 | |
324 | /* |
325 | * Once we have all the memseg lists configured, if there is a dma mask |
326 | * set, check iova addresses are not out of range. Otherwise the device |
327 | * setting the dma mask could have problems with the mapped memory. |
328 | * |
329 | * There are two situations when this can happen: |
330 | * 1) memory initialization |
331 | * 2) dynamic memory allocation |
332 | * |
333 | * For 1), an error when checking dma mask implies app can not be |
334 | * executed. For 2) implies the new memory can not be added. |
335 | */ |
336 | if (mcfg->dma_maskbits && |
337 | rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { |
338 | /* |
339 | * Currently this can only happen if IOMMU is enabled |
340 | * and the address width supported by the IOMMU hw is |
341 | * not enough for using the memory mapped IOVAs. |
342 | * |
343 | * If IOVA is VA, advice to try with '--iova-mode pa' |
344 | * which could solve some situations when IOVA VA is not |
345 | * really needed. |
346 | */ |
347 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n" , __func__) |
348 | "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n",rte_log(4U, 0, "EAL" ": " "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n" , __func__) |
349 | __func__)rte_log(4U, 0, "EAL" ": " "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n" , __func__); |
350 | |
351 | /* |
352 | * If IOVA is VA and it is possible to run with IOVA PA, |
353 | * because user is root, give and advice for solving the |
354 | * problem. |
355 | */ |
356 | if ((rte_eal_iova_mode() == RTE_IOVA_VA) && |
357 | rte_eal_using_phys_addrs()) |
358 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "%s(): Please try initializing EAL with --iova-mode=pa parameter\n" , __func__) |
359 | "%s(): Please try initializing EAL with --iova-mode=pa parameter\n",rte_log(4U, 0, "EAL" ": " "%s(): Please try initializing EAL with --iova-mode=pa parameter\n" , __func__) |
360 | __func__)rte_log(4U, 0, "EAL" ": " "%s(): Please try initializing EAL with --iova-mode=pa parameter\n" , __func__); |
361 | goto fail; |
362 | } |
363 | |
364 | /* add newly minted memsegs to malloc heap */ |
365 | elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz); |
366 | |
367 | /* try once more, as now we have allocated new memory */ |
368 | ret = find_suitable_element(heap, elt_size, flags, align, bound, |
369 | contig); |
370 | |
371 | if (ret == NULL((void*)0)) |
372 | goto fail; |
373 | |
374 | return elem; |
375 | |
376 | fail: |
377 | rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); |
378 | return NULL((void*)0); |
379 | } |
380 | |
381 | static int |
382 | try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, |
383 | size_t elt_size, int socket, unsigned int flags, size_t align, |
384 | size_t bound, bool_Bool contig) |
385 | { |
386 | struct malloc_elem *elem; |
387 | struct rte_memseg **ms; |
388 | void *map_addr; |
389 | size_t alloc_sz; |
390 | int n_segs; |
391 | bool_Bool callback_triggered = false0; |
392 | |
393 | alloc_sz = RTE_ALIGN_CEIL(align + elt_size +(__typeof__(((align + elt_size + MALLOC_ELEM_TRAILER_LEN) + ( (__typeof__(align + elt_size + MALLOC_ELEM_TRAILER_LEN)) (pg_sz ) - 1))))((((align + elt_size + MALLOC_ELEM_TRAILER_LEN) + (( __typeof__(align + elt_size + MALLOC_ELEM_TRAILER_LEN)) (pg_sz ) - 1))) & (~((__typeof__(((align + elt_size + MALLOC_ELEM_TRAILER_LEN ) + ((__typeof__(align + elt_size + MALLOC_ELEM_TRAILER_LEN)) (pg_sz) - 1))))((pg_sz) - 1)))) |
394 | MALLOC_ELEM_TRAILER_LEN, pg_sz)(__typeof__(((align + elt_size + MALLOC_ELEM_TRAILER_LEN) + ( (__typeof__(align + elt_size + MALLOC_ELEM_TRAILER_LEN)) (pg_sz ) - 1))))((((align + elt_size + MALLOC_ELEM_TRAILER_LEN) + (( __typeof__(align + elt_size + MALLOC_ELEM_TRAILER_LEN)) (pg_sz ) - 1))) & (~((__typeof__(((align + elt_size + MALLOC_ELEM_TRAILER_LEN ) + ((__typeof__(align + elt_size + MALLOC_ELEM_TRAILER_LEN)) (pg_sz) - 1))))((pg_sz) - 1)))); |
395 | n_segs = alloc_sz / pg_sz; |
396 | |
397 | /* we can't know in advance how many pages we'll need, so we malloc */ |
398 | ms = malloc(sizeof(*ms) * n_segs); |
399 | if (ms == NULL((void*)0)) |
400 | return -1; |
401 | memset(ms, 0, sizeof(*ms) * n_segs); |
402 | |
403 | elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align, |
404 | bound, contig, ms, n_segs); |
405 | |
406 | if (elem == NULL((void*)0)) |
407 | goto free_ms; |
408 | |
409 | map_addr = ms[0]->addr; |
410 | |
411 | /* notify user about changes in memory map */ |
412 | eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz); |
413 | |
414 | /* notify other processes that this has happened */ |
415 | if (request_sync()) { |
416 | /* we couldn't ensure all processes have mapped memory, |
417 | * so free it back and notify everyone that it's been |
418 | * freed back. |
419 | * |
420 | * technically, we could've avoided adding memory addresses to |
421 | * the map, but that would've led to inconsistent behavior |
422 | * between primary and secondary processes, as those get |
423 | * callbacks during sync. therefore, force primary process to |
424 | * do alloc-and-rollback syncs as well. |
425 | */ |
426 | callback_triggered = true1; |
427 | goto free_elem; |
428 | } |
429 | heap->total_size += alloc_sz; |
430 | |
431 | RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",rte_log(8U, 0, "EAL" ": " "Heap on socket %d was expanded by %zdMB\n" , socket, alloc_sz >> 20ULL) |
432 | socket, alloc_sz >> 20ULL)rte_log(8U, 0, "EAL" ": " "Heap on socket %d was expanded by %zdMB\n" , socket, alloc_sz >> 20ULL); |
433 | |
434 | free(ms); |
435 | |
436 | return 0; |
437 | |
438 | free_elem: |
439 | if (callback_triggered) |
440 | eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, |
441 | map_addr, alloc_sz); |
442 | |
443 | rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz); |
444 | |
445 | request_sync(); |
446 | free_ms: |
447 | free(ms); |
448 | |
449 | return -1; |
450 | } |
451 | |
452 | static int |
453 | try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz, |
454 | size_t elt_size, int socket, unsigned int flags, size_t align, |
455 | size_t bound, bool_Bool contig) |
456 | { |
457 | struct malloc_mp_req req; |
458 | int req_result; |
459 | |
460 | memset(&req, 0, sizeof(req)); |
461 | |
462 | req.t = REQ_TYPE_ALLOC; |
463 | req.alloc_req.align = align; |
464 | req.alloc_req.bound = bound; |
465 | req.alloc_req.contig = contig; |
466 | req.alloc_req.flags = flags; |
467 | req.alloc_req.elt_size = elt_size; |
468 | req.alloc_req.page_sz = pg_sz; |
469 | req.alloc_req.socket = socket; |
470 | req.alloc_req.heap = heap; /* it's in shared memory */ |
471 | |
472 | req_result = request_to_primary(&req); |
473 | |
474 | if (req_result != 0) |
475 | return -1; |
476 | |
477 | if (req.result != REQ_RESULT_SUCCESS) |
478 | return -1; |
479 | |
480 | return 0; |
481 | } |
482 | |
483 | static int |
484 | try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, |
485 | int socket, unsigned int flags, size_t align, size_t bound, |
486 | bool_Bool contig) |
487 | { |
488 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
489 | int ret; |
490 | |
491 | rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); |
492 | |
493 | if (rte_eal_process_type() == RTE_PROC_PRIMARY) { |
494 | ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket, |
495 | flags, align, bound, contig); |
496 | } else { |
497 | ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket, |
498 | flags, align, bound, contig); |
499 | } |
500 | |
501 | rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock); |
502 | return ret; |
503 | } |
504 | |
505 | static int |
506 | compare_pagesz(const void *a, const void *b) |
507 | { |
508 | const struct rte_memseg_list * const*mpa = a; |
509 | const struct rte_memseg_list * const*mpb = b; |
510 | const struct rte_memseg_list *msla = *mpa; |
511 | const struct rte_memseg_list *mslb = *mpb; |
512 | uint64_t pg_sz_a = msla->page_sz; |
513 | uint64_t pg_sz_b = mslb->page_sz; |
514 | |
515 | if (pg_sz_a < pg_sz_b) |
516 | return -1; |
517 | if (pg_sz_a > pg_sz_b) |
518 | return 1; |
519 | return 0; |
520 | } |
521 | |
522 | static int |
523 | alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket, |
524 | unsigned int flags, size_t align, size_t bound, bool_Bool contig) |
525 | { |
526 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
527 | struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS64]; |
528 | struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS64]; |
529 | uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS64]; |
530 | uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS64]; |
531 | uint64_t prev_pg_sz; |
532 | int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz; |
533 | bool_Bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY0x00000004) > 0; |
534 | unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY0x00000004; |
535 | void *ret; |
536 | |
537 | memset(requested_msls, 0, sizeof(requested_msls)); |
538 | memset(other_msls, 0, sizeof(other_msls)); |
539 | memset(requested_pg_sz, 0, sizeof(requested_pg_sz)); |
540 | memset(other_pg_sz, 0, sizeof(other_pg_sz)); |
541 | |
542 | /* |
543 | * go through memseg list and take note of all the page sizes available, |
544 | * and if any of them were specifically requested by the user. |
545 | */ |
546 | n_requested_msls = 0; |
547 | n_other_msls = 0; |
548 | for (i = 0; i < RTE_MAX_MEMSEG_LISTS64; i++) { |
549 | struct rte_memseg_list *msl = &mcfg->memsegs[i]; |
550 | |
551 | if (msl->socket_id != socket) |
552 | continue; |
553 | |
554 | if (msl->base_va == NULL((void*)0)) |
555 | continue; |
556 | |
557 | /* if pages of specific size were requested */ |
558 | if (size_flags != 0 && check_hugepage_sz(size_flags, |
559 | msl->page_sz)) |
560 | requested_msls[n_requested_msls++] = msl; |
561 | else if (size_flags == 0 || size_hint) |
562 | other_msls[n_other_msls++] = msl; |
563 | } |
564 | |
565 | /* sort the lists, smallest first */ |
566 | qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]), |
567 | compare_pagesz); |
568 | qsort(other_msls, n_other_msls, sizeof(other_msls[0]), |
569 | compare_pagesz); |
570 | |
571 | /* now, extract page sizes we are supposed to try */ |
572 | prev_pg_sz = 0; |
573 | n_requested_pg_sz = 0; |
574 | for (i = 0; i < n_requested_msls; i++) { |
575 | uint64_t pg_sz = requested_msls[i]->page_sz; |
576 | |
577 | if (prev_pg_sz != pg_sz) { |
578 | requested_pg_sz[n_requested_pg_sz++] = pg_sz; |
579 | prev_pg_sz = pg_sz; |
580 | } |
581 | } |
582 | prev_pg_sz = 0; |
583 | n_other_pg_sz = 0; |
584 | for (i = 0; i < n_other_msls; i++) { |
585 | uint64_t pg_sz = other_msls[i]->page_sz; |
586 | |
587 | if (prev_pg_sz != pg_sz) { |
588 | other_pg_sz[n_other_pg_sz++] = pg_sz; |
589 | prev_pg_sz = pg_sz; |
590 | } |
591 | } |
592 | |
593 | /* finally, try allocating memory of specified page sizes, starting from |
594 | * the smallest sizes |
595 | */ |
596 | for (i = 0; i < n_requested_pg_sz; i++) { |
597 | uint64_t pg_sz = requested_pg_sz[i]; |
598 | |
599 | /* |
600 | * do not pass the size hint here, as user expects other page |
601 | * sizes first, before resorting to best effort allocation. |
602 | */ |
603 | if (!try_expand_heap(heap, pg_sz, size, socket, size_flags, |
604 | align, bound, contig)) |
605 | return 0; |
606 | } |
607 | if (n_other_pg_sz == 0) |
608 | return -1; |
609 | |
610 | /* now, check if we can reserve anything with size hint */ |
611 | ret = find_suitable_element(heap, size, flags, align, bound, contig); |
612 | if (ret != NULL((void*)0)) |
613 | return 0; |
614 | |
615 | /* |
616 | * we still couldn't reserve memory, so try expanding heap with other |
617 | * page sizes, if there are any |
618 | */ |
619 | for (i = 0; i < n_other_pg_sz; i++) { |
620 | uint64_t pg_sz = other_pg_sz[i]; |
621 | |
622 | if (!try_expand_heap(heap, pg_sz, size, socket, flags, |
623 | align, bound, contig)) |
624 | return 0; |
625 | } |
626 | return -1; |
627 | } |
628 | |
629 | /* this will try lower page sizes first */ |
630 | static void * |
631 | malloc_heap_alloc_on_heap_id(const char *type, size_t size, |
632 | unsigned int heap_id, unsigned int flags, size_t align, |
633 | size_t bound, bool_Bool contig) |
634 | { |
635 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
636 | struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; |
637 | unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY0x00000004; |
638 | int socket_id; |
639 | void *ret; |
640 | |
641 | rte_spinlock_lock(&(heap->lock)); |
642 | |
643 | align = align == 0 ? 1 : align; |
644 | |
645 | /* for legacy mode, try once and with all flags */ |
646 | if (internal_config.legacy_mem) { |
647 | ret = heap_alloc(heap, type, size, flags, align, bound, contig); |
648 | goto alloc_unlock; |
649 | } |
650 | |
651 | /* |
652 | * we do not pass the size hint here, because even if allocation fails, |
653 | * we may still be able to allocate memory from appropriate page sizes, |
654 | * we just need to request more memory first. |
655 | */ |
656 | |
657 | socket_id = rte_socket_id_by_idx(heap_id); |
658 | /* |
659 | * if socket ID is negative, we cannot find a socket ID for this heap - |
660 | * which means it's an external heap. those can have unexpected page |
661 | * sizes, so if the user asked to allocate from there - assume user |
662 | * knows what they're doing, and allow allocating from there with any |
663 | * page size flags. |
664 | */ |
665 | if (socket_id < 0) |
666 | size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY0x00000004; |
667 | |
668 | ret = heap_alloc(heap, type, size, size_flags, align, bound, contig); |
669 | if (ret != NULL((void*)0)) |
670 | goto alloc_unlock; |
671 | |
672 | /* if socket ID is invalid, this is an external heap */ |
673 | if (socket_id < 0) |
674 | goto alloc_unlock; |
675 | |
676 | if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align, |
677 | bound, contig)) { |
678 | ret = heap_alloc(heap, type, size, flags, align, bound, contig); |
679 | |
680 | /* this should have succeeded */ |
681 | if (ret == NULL((void*)0)) |
682 | RTE_LOG(ERR, EAL, "Error allocating from heap\n")rte_log(4U, 0, "EAL" ": " "Error allocating from heap\n"); |
683 | } |
684 | alloc_unlock: |
685 | rte_spinlock_unlock(&(heap->lock)); |
686 | return ret; |
687 | } |
688 | |
689 | void * |
690 | malloc_heap_alloc(const char *type, size_t size, int socket_arg, |
691 | unsigned int flags, size_t align, size_t bound, bool_Bool contig) |
692 | { |
693 | int socket, heap_id, i; |
694 | void *ret; |
695 | |
696 | /* return NULL if size is 0 or alignment is not power-of-2 */ |
697 | if (size == 0 || (align && !rte_is_power_of_2(align))) |
698 | return NULL((void*)0); |
699 | |
700 | if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES8) |
701 | socket_arg = SOCKET_ID_ANY-1; |
702 | |
703 | if (socket_arg == SOCKET_ID_ANY-1) |
704 | socket = malloc_get_numa_socket(); |
705 | else |
706 | socket = socket_arg; |
707 | |
708 | /* turn socket ID into heap ID */ |
709 | heap_id = malloc_socket_to_heap_id(socket); |
710 | /* if heap id is negative, socket ID was invalid */ |
711 | if (heap_id < 0) |
712 | return NULL((void*)0); |
713 | |
714 | ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align, |
715 | bound, contig); |
716 | if (ret != NULL((void*)0) || socket_arg != SOCKET_ID_ANY-1) |
717 | return ret; |
718 | |
719 | /* try other heaps. we are only iterating through native DPDK sockets, |
720 | * so external heaps won't be included. |
721 | */ |
722 | for (i = 0; i < (int) rte_socket_count(); i++) { |
723 | if (i == heap_id) |
724 | continue; |
725 | ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align, |
726 | bound, contig); |
727 | if (ret != NULL((void*)0)) |
728 | return ret; |
729 | } |
730 | return NULL((void*)0); |
731 | } |
732 | |
733 | static void * |
734 | heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id, |
735 | unsigned int flags, size_t align, bool_Bool contig) |
736 | { |
737 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
738 | struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; |
739 | void *ret; |
740 | |
741 | rte_spinlock_lock(&(heap->lock)); |
742 | |
743 | align = align == 0 ? 1 : align; |
744 | |
745 | ret = heap_alloc_biggest(heap, type, flags, align, contig); |
746 | |
747 | rte_spinlock_unlock(&(heap->lock)); |
748 | |
749 | return ret; |
750 | } |
751 | |
752 | void * |
753 | malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, |
754 | size_t align, bool_Bool contig) |
755 | { |
756 | int socket, i, cur_socket, heap_id; |
757 | void *ret; |
758 | |
759 | /* return NULL if align is not power-of-2 */ |
760 | if ((align && !rte_is_power_of_2(align))) |
761 | return NULL((void*)0); |
762 | |
763 | if (!rte_eal_has_hugepages()) |
764 | socket_arg = SOCKET_ID_ANY-1; |
765 | |
766 | if (socket_arg == SOCKET_ID_ANY-1) |
767 | socket = malloc_get_numa_socket(); |
768 | else |
769 | socket = socket_arg; |
770 | |
771 | /* turn socket ID into heap ID */ |
772 | heap_id = malloc_socket_to_heap_id(socket); |
773 | /* if heap id is negative, socket ID was invalid */ |
774 | if (heap_id < 0) |
775 | return NULL((void*)0); |
776 | |
777 | ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align, |
778 | contig); |
779 | if (ret != NULL((void*)0) || socket_arg != SOCKET_ID_ANY-1) |
780 | return ret; |
781 | |
782 | /* try other heaps */ |
783 | for (i = 0; i < (int) rte_socket_count(); i++) { |
784 | cur_socket = rte_socket_id_by_idx(i); |
785 | if (cur_socket == socket) |
786 | continue; |
787 | ret = heap_alloc_biggest_on_heap_id(type, i, flags, align, |
788 | contig); |
789 | if (ret != NULL((void*)0)) |
790 | return ret; |
791 | } |
792 | return NULL((void*)0); |
793 | } |
794 | |
795 | /* this function is exposed in malloc_mp.h */ |
796 | int |
797 | malloc_heap_free_pages(void *aligned_start, size_t aligned_len) |
798 | { |
799 | int n_segs, seg_idx, max_seg_idx; |
800 | struct rte_memseg_list *msl; |
801 | size_t page_sz; |
802 | |
803 | msl = rte_mem_virt2memseg_list(aligned_start); |
804 | if (msl == NULL((void*)0)) |
805 | return -1; |
806 | |
807 | page_sz = (size_t)msl->page_sz; |
808 | n_segs = aligned_len / page_sz; |
809 | seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va)((uintptr_t)(aligned_start) - (uintptr_t)(msl->base_va)) / page_sz; |
810 | max_seg_idx = seg_idx + n_segs; |
811 | |
812 | for (; seg_idx < max_seg_idx; seg_idx++) { |
813 | struct rte_memseg *ms; |
814 | |
815 | ms = rte_fbarray_get(&msl->memseg_arr, seg_idx); |
816 | eal_memalloc_free_seg(ms); |
817 | } |
818 | return 0; |
819 | } |
820 | |
821 | int |
822 | malloc_heap_free(struct malloc_elem *elem) |
823 | { |
824 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
825 | struct malloc_heap *heap; |
826 | void *start, *aligned_start, *end, *aligned_end; |
827 | size_t len, aligned_len, page_sz; |
828 | struct rte_memseg_list *msl; |
829 | unsigned int i, n_segs, before_space, after_space; |
830 | int ret; |
831 | |
832 | if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) |
833 | return -1; |
834 | |
835 | /* elem may be merged with previous element, so keep heap address */ |
836 | heap = elem->heap; |
837 | msl = elem->msl; |
838 | page_sz = (size_t)msl->page_sz; |
839 | |
840 | rte_spinlock_lock(&(heap->lock)); |
841 | |
842 | /* mark element as free */ |
843 | elem->state = ELEM_FREE; |
844 | |
845 | elem = malloc_elem_free(elem); |
846 | |
847 | /* anything after this is a bonus */ |
848 | ret = 0; |
849 | |
850 | /* ...of which we can't avail if we are in legacy mode, or if this is an |
851 | * externally allocated segment. |
852 | */ |
853 | if (internal_config.legacy_mem || (msl->external > 0)) |
854 | goto free_unlock; |
855 | |
856 | /* check if we can free any memory back to the system */ |
857 | if (elem->size < page_sz) |
858 | goto free_unlock; |
859 | |
860 | /* if user requested to match allocations, the sizes must match - if not, |
861 | * we will defer freeing these hugepages until the entire original allocation |
862 | * can be freed |
863 | */ |
864 | if (internal_config.match_allocations && elem->size != elem->orig_size) |
865 | goto free_unlock; |
866 | |
867 | /* probably, but let's make sure, as we may not be using up full page */ |
868 | start = elem; |
869 | len = elem->size; |
870 | aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz)((__typeof__((__typeof__(start))((void*)((uintptr_t)(start) + ((page_sz) - 1)))))(__typeof__((uintptr_t)(__typeof__(start) )((void*)((uintptr_t)(start) + ((page_sz) - 1)))))(((uintptr_t )(__typeof__(start))((void*)((uintptr_t)(start) + ((page_sz) - 1)))) & (~((__typeof__((uintptr_t)(__typeof__(start))((void *)((uintptr_t)(start) + ((page_sz) - 1)))))((page_sz) - 1)))) ); |
871 | end = RTE_PTR_ADD(elem, len)((void*)((uintptr_t)(elem) + (len))); |
872 | aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz)((__typeof__(end))(__typeof__((uintptr_t)end))(((uintptr_t)end ) & (~((__typeof__((uintptr_t)end))((page_sz) - 1))))); |
873 | |
874 | aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start)((uintptr_t)(aligned_end) - (uintptr_t)(aligned_start)); |
875 | |
876 | /* can't free anything */ |
877 | if (aligned_len < page_sz) |
878 | goto free_unlock; |
879 | |
880 | /* we can free something. however, some of these pages may be marked as |
881 | * unfreeable, so also check that as well |
882 | */ |
883 | n_segs = aligned_len / page_sz; |
884 | for (i = 0; i < n_segs; i++) { |
885 | const struct rte_memseg *tmp = |
886 | rte_mem_virt2memseg(aligned_start, msl); |
887 | |
888 | if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE(1 << 0)) { |
889 | /* this is an unfreeable segment, so move start */ |
890 | aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len)((void*)((uintptr_t)(tmp->addr) + (tmp->len))); |
891 | } |
892 | } |
893 | |
894 | /* recalculate length and number of segments */ |
895 | aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start)((uintptr_t)(aligned_end) - (uintptr_t)(aligned_start)); |
896 | n_segs = aligned_len / page_sz; |
897 | |
898 | /* check if we can still free some pages */ |
899 | if (n_segs == 0) |
900 | goto free_unlock; |
901 | |
902 | /* We're not done yet. We also have to check if by freeing space we will |
903 | * be leaving free elements that are too small to store new elements. |
904 | * Check if we have enough space in the beginning and at the end, or if |
905 | * start/end are exactly page aligned. |
906 | */ |
907 | before_space = RTE_PTR_DIFF(aligned_start, elem)((uintptr_t)(aligned_start) - (uintptr_t)(elem)); |
908 | after_space = RTE_PTR_DIFF(end, aligned_end)((uintptr_t)(end) - (uintptr_t)(aligned_end)); |
909 | if (before_space != 0 && |
910 | before_space < MALLOC_ELEM_OVERHEAD(MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN) + MIN_DATA_SIZE(64)) { |
911 | /* There is not enough space before start, but we may be able to |
912 | * move the start forward by one page. |
913 | */ |
914 | if (n_segs == 1) |
915 | goto free_unlock; |
916 | |
917 | /* move start */ |
918 | aligned_start = RTE_PTR_ADD(aligned_start, page_sz)((void*)((uintptr_t)(aligned_start) + (page_sz))); |
919 | aligned_len -= page_sz; |
920 | n_segs--; |
921 | } |
922 | if (after_space != 0 && after_space < |
923 | MALLOC_ELEM_OVERHEAD(MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN) + MIN_DATA_SIZE(64)) { |
924 | /* There is not enough space after end, but we may be able to |
925 | * move the end backwards by one page. |
926 | */ |
927 | if (n_segs == 1) |
928 | goto free_unlock; |
929 | |
930 | /* move end */ |
931 | aligned_end = RTE_PTR_SUB(aligned_end, page_sz)((void*)((uintptr_t)aligned_end - (page_sz))); |
Value stored to 'aligned_end' is never read | |
932 | aligned_len -= page_sz; |
933 | n_segs--; |
934 | } |
935 | |
936 | /* now we can finally free us some pages */ |
937 | |
938 | rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); |
939 | |
940 | /* |
941 | * we allow secondary processes to clear the heap of this allocated |
942 | * memory because it is safe to do so, as even if notifications about |
943 | * unmapped pages don't make it to other processes, heap is shared |
944 | * across all processes, and will become empty of this memory anyway, |
945 | * and nothing can allocate it back unless primary process will be able |
946 | * to deliver allocation message to every single running process. |
947 | */ |
948 | |
949 | malloc_elem_free_list_remove(elem); |
950 | |
951 | malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len); |
952 | |
953 | heap->total_size -= aligned_len; |
954 | |
955 | if (rte_eal_process_type() == RTE_PROC_PRIMARY) { |
956 | /* notify user about changes in memory map */ |
957 | eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, |
958 | aligned_start, aligned_len); |
959 | |
960 | /* don't care if any of this fails */ |
961 | malloc_heap_free_pages(aligned_start, aligned_len); |
962 | |
963 | request_sync(); |
964 | } else { |
965 | struct malloc_mp_req req; |
966 | |
967 | memset(&req, 0, sizeof(req)); |
968 | |
969 | req.t = REQ_TYPE_FREE; |
970 | req.free_req.addr = aligned_start; |
971 | req.free_req.len = aligned_len; |
972 | |
973 | /* |
974 | * we request primary to deallocate pages, but we don't do it |
975 | * in this thread. instead, we notify primary that we would like |
976 | * to deallocate pages, and this process will receive another |
977 | * request (in parallel) that will do it for us on another |
978 | * thread. |
979 | * |
980 | * we also don't really care if this succeeds - the data is |
981 | * already removed from the heap, so it is, for all intents and |
982 | * purposes, hidden from the rest of DPDK even if some other |
983 | * process (including this one) may have these pages mapped. |
984 | * |
985 | * notifications about deallocated memory happen during sync. |
986 | */ |
987 | request_to_primary(&req); |
988 | } |
989 | |
990 | RTE_LOG(DEBUG, EAL, "Heap on socket %d was shrunk by %zdMB\n",rte_log(8U, 0, "EAL" ": " "Heap on socket %d was shrunk by %zdMB\n" , msl->socket_id, aligned_len >> 20ULL) |
991 | msl->socket_id, aligned_len >> 20ULL)rte_log(8U, 0, "EAL" ": " "Heap on socket %d was shrunk by %zdMB\n" , msl->socket_id, aligned_len >> 20ULL); |
992 | |
993 | rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock); |
994 | free_unlock: |
995 | rte_spinlock_unlock(&(heap->lock)); |
996 | return ret; |
997 | } |
998 | |
999 | int |
1000 | malloc_heap_resize(struct malloc_elem *elem, size_t size) |
1001 | { |
1002 | int ret; |
1003 | |
1004 | if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) |
1005 | return -1; |
1006 | |
1007 | rte_spinlock_lock(&(elem->heap->lock)); |
1008 | |
1009 | ret = malloc_elem_resize(elem, size); |
1010 | |
1011 | rte_spinlock_unlock(&(elem->heap->lock)); |
1012 | |
1013 | return ret; |
1014 | } |
1015 | |
1016 | /* |
1017 | * Function to retrieve data for a given heap |
1018 | */ |
1019 | int |
1020 | malloc_heap_get_stats(struct malloc_heap *heap, |
1021 | struct rte_malloc_socket_stats *socket_stats) |
1022 | { |
1023 | size_t idx; |
1024 | struct malloc_elem *elem; |
1025 | |
1026 | rte_spinlock_lock(&heap->lock); |
1027 | |
1028 | /* Initialise variables for heap */ |
1029 | socket_stats->free_count = 0; |
1030 | socket_stats->heap_freesz_bytes = 0; |
1031 | socket_stats->greatest_free_size = 0; |
1032 | |
1033 | /* Iterate through free list */ |
1034 | for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS13; idx++) { |
1035 | for (elem = LIST_FIRST(&heap->free_head[idx])((&heap->free_head[idx])->lh_first); |
1036 | !!elem; elem = LIST_NEXT(elem, free_list)((elem)->free_list.le_next)) |
1037 | { |
1038 | socket_stats->free_count++; |
1039 | socket_stats->heap_freesz_bytes += elem->size; |
1040 | if (elem->size > socket_stats->greatest_free_size) |
1041 | socket_stats->greatest_free_size = elem->size; |
1042 | } |
1043 | } |
1044 | /* Get stats on overall heap and allocated memory on this heap */ |
1045 | socket_stats->heap_totalsz_bytes = heap->total_size; |
1046 | socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes - |
1047 | socket_stats->heap_freesz_bytes); |
1048 | socket_stats->alloc_count = heap->alloc_count; |
1049 | |
1050 | rte_spinlock_unlock(&heap->lock); |
1051 | return 0; |
1052 | } |
1053 | |
1054 | /* |
1055 | * Function to retrieve data for a given heap |
1056 | */ |
1057 | void |
1058 | malloc_heap_dump(struct malloc_heap *heap, FILE *f) |
1059 | { |
1060 | struct malloc_elem *elem; |
1061 | |
1062 | rte_spinlock_lock(&heap->lock); |
1063 | |
1064 | fprintf(f, "Heap size: 0x%zx\n", heap->total_size); |
1065 | fprintf(f, "Heap alloc count: %u\n", heap->alloc_count); |
1066 | |
1067 | elem = heap->first; |
1068 | while (elem) { |
1069 | malloc_elem_dump(elem, f); |
1070 | elem = elem->next; |
1071 | } |
1072 | |
1073 | rte_spinlock_unlock(&heap->lock); |
1074 | } |
1075 | |
1076 | static int |
1077 | destroy_elem(struct malloc_elem *elem, size_t len) |
1078 | { |
1079 | struct malloc_heap *heap = elem->heap; |
1080 | |
1081 | /* notify all subscribers that a memory area is going to be removed */ |
1082 | eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len); |
1083 | |
1084 | /* this element can be removed */ |
1085 | malloc_elem_free_list_remove(elem); |
1086 | malloc_elem_hide_region(elem, elem, len); |
1087 | |
1088 | heap->total_size -= len; |
1089 | |
1090 | memset(elem, 0, sizeof(*elem)); |
1091 | |
1092 | return 0; |
1093 | } |
1094 | |
1095 | struct rte_memseg_list * |
1096 | malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], |
1097 | unsigned int n_pages, size_t page_sz, const char *seg_name, |
1098 | unsigned int socket_id) |
1099 | { |
1100 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
1101 | char fbarray_name[RTE_FBARRAY_NAME_LEN64]; |
1102 | struct rte_memseg_list *msl = NULL((void*)0); |
1103 | struct rte_fbarray *arr; |
1104 | size_t seg_len = n_pages * page_sz; |
1105 | unsigned int i; |
1106 | |
1107 | /* first, find a free memseg list */ |
1108 | for (i = 0; i < RTE_MAX_MEMSEG_LISTS64; i++) { |
1109 | struct rte_memseg_list *tmp = &mcfg->memsegs[i]; |
1110 | if (tmp->base_va == NULL((void*)0)) { |
1111 | msl = tmp; |
1112 | break; |
1113 | } |
1114 | } |
1115 | if (msl == NULL((void*)0)) { |
1116 | RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n")rte_log(4U, 0, "EAL" ": " "Couldn't find empty memseg list\n" ); |
1117 | rte_errno(per_lcore__rte_errno) = ENOSPC28; |
1118 | return NULL((void*)0); |
1119 | } |
1120 | |
1121 | snprintf(fbarray_name, sizeof(fbarray_name) - 1, "%s_%p", |
1122 | seg_name, va_addr); |
1123 | |
1124 | /* create the backing fbarray */ |
1125 | if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages, |
1126 | sizeof(struct rte_memseg)) < 0) { |
1127 | RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n")rte_log(4U, 0, "EAL" ": " "Couldn't create fbarray backing the memseg list\n" ); |
1128 | return NULL((void*)0); |
1129 | } |
1130 | arr = &msl->memseg_arr; |
1131 | |
1132 | /* fbarray created, fill it up */ |
1133 | for (i = 0; i < n_pages; i++) { |
1134 | struct rte_memseg *ms; |
1135 | |
1136 | rte_fbarray_set_used(arr, i); |
1137 | ms = rte_fbarray_get(arr, i); |
1138 | ms->addr = RTE_PTR_ADD(va_addr, i * page_sz)((void*)((uintptr_t)(va_addr) + (i * page_sz))); |
1139 | ms->iova = iova_addrs == NULL((void*)0) ? RTE_BAD_IOVA((rte_iova_t)-1) : iova_addrs[i]; |
1140 | ms->hugepage_sz = page_sz; |
1141 | ms->len = page_sz; |
1142 | ms->nchannel = rte_memory_get_nchannel(); |
1143 | ms->nrank = rte_memory_get_nrank(); |
1144 | ms->socket_id = socket_id; |
1145 | } |
1146 | |
1147 | /* set up the memseg list */ |
1148 | msl->base_va = va_addr; |
1149 | msl->page_sz = page_sz; |
1150 | msl->socket_id = socket_id; |
1151 | msl->len = seg_len; |
1152 | msl->version = 0; |
1153 | msl->external = 1; |
1154 | |
1155 | return msl; |
1156 | } |
1157 | |
1158 | struct extseg_walk_arg { |
1159 | void *va_addr; |
1160 | size_t len; |
1161 | struct rte_memseg_list *msl; |
1162 | }; |
1163 | |
1164 | static int |
1165 | extseg_walk(const struct rte_memseg_list *msl, void *arg) |
1166 | { |
1167 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
1168 | struct extseg_walk_arg *wa = arg; |
1169 | |
1170 | if (msl->base_va == wa->va_addr && msl->len == wa->len) { |
1171 | unsigned int found_idx; |
1172 | |
1173 | /* msl is const */ |
1174 | found_idx = msl - mcfg->memsegs; |
1175 | wa->msl = &mcfg->memsegs[found_idx]; |
1176 | return 1; |
1177 | } |
1178 | return 0; |
1179 | } |
1180 | |
1181 | struct rte_memseg_list * |
1182 | malloc_heap_find_external_seg(void *va_addr, size_t len) |
1183 | { |
1184 | struct extseg_walk_arg wa; |
1185 | int res; |
1186 | |
1187 | wa.va_addr = va_addr; |
1188 | wa.len = len; |
1189 | |
1190 | res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa); |
1191 | |
1192 | if (res != 1) { |
1193 | /* 0 means nothing was found, -1 shouldn't happen */ |
1194 | if (res == 0) |
1195 | rte_errno(per_lcore__rte_errno) = ENOENT2; |
1196 | return NULL((void*)0); |
1197 | } |
1198 | return wa.msl; |
1199 | } |
1200 | |
1201 | int |
1202 | malloc_heap_destroy_external_seg(struct rte_memseg_list *msl) |
1203 | { |
1204 | /* destroy the fbarray backing this memory */ |
1205 | if (rte_fbarray_destroy(&msl->memseg_arr) < 0) |
1206 | return -1; |
1207 | |
1208 | /* reset the memseg list */ |
1209 | memset(msl, 0, sizeof(*msl)); |
1210 | |
1211 | return 0; |
1212 | } |
1213 | |
1214 | int |
1215 | malloc_heap_add_external_memory(struct malloc_heap *heap, |
1216 | struct rte_memseg_list *msl) |
1217 | { |
1218 | /* erase contents of new memory */ |
1219 | memset(msl->base_va, 0, msl->len); |
1220 | |
1221 | /* now, add newly minted memory to the malloc heap */ |
1222 | malloc_heap_add_memory(heap, msl, msl->base_va, msl->len); |
1223 | |
1224 | heap->total_size += msl->len; |
1225 | |
1226 | /* all done! */ |
1227 | RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n",rte_log(8U, 0, "EAL" ": " "Added segment for heap %s starting at %p\n" , heap->name, msl->base_va) |
1228 | heap->name, msl->base_va)rte_log(8U, 0, "EAL" ": " "Added segment for heap %s starting at %p\n" , heap->name, msl->base_va); |
1229 | |
1230 | /* notify all subscribers that a new memory area has been added */ |
1231 | eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, |
1232 | msl->base_va, msl->len); |
1233 | |
1234 | return 0; |
1235 | } |
1236 | |
1237 | int |
1238 | malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr, |
1239 | size_t len) |
1240 | { |
1241 | struct malloc_elem *elem = heap->first; |
1242 | |
1243 | /* find element with specified va address */ |
1244 | while (elem != NULL((void*)0) && elem != va_addr) { |
1245 | elem = elem->next; |
1246 | /* stop if we've blown past our VA */ |
1247 | if (elem > (struct malloc_elem *)va_addr) { |
1248 | rte_errno(per_lcore__rte_errno) = ENOENT2; |
1249 | return -1; |
1250 | } |
1251 | } |
1252 | /* check if element was found */ |
1253 | if (elem == NULL((void*)0) || elem->msl->len != len) { |
1254 | rte_errno(per_lcore__rte_errno) = ENOENT2; |
1255 | return -1; |
1256 | } |
1257 | /* if element's size is not equal to segment len, segment is busy */ |
1258 | if (elem->state == ELEM_BUSY || elem->size != len) { |
1259 | rte_errno(per_lcore__rte_errno) = EBUSY16; |
1260 | return -1; |
1261 | } |
1262 | return destroy_elem(elem, len); |
1263 | } |
1264 | |
1265 | int |
1266 | malloc_heap_create(struct malloc_heap *heap, const char *heap_name) |
1267 | { |
1268 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
1269 | uint32_t next_socket_id = mcfg->next_socket_id; |
1270 | |
1271 | /* prevent overflow. did you really create 2 billion heaps??? */ |
1272 | if (next_socket_id > INT32_MAX(2147483647)) { |
1273 | RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n")rte_log(4U, 0, "EAL" ": " "Cannot assign new socket ID's\n"); |
1274 | rte_errno(per_lcore__rte_errno) = ENOSPC28; |
1275 | return -1; |
1276 | } |
1277 | |
1278 | /* initialize empty heap */ |
1279 | heap->alloc_count = 0; |
1280 | heap->first = NULL((void*)0); |
1281 | heap->last = NULL((void*)0); |
1282 | LIST_INIT(heap->free_head)do { (heap->free_head)->lh_first = ((void*)0); } while ( 0); |
1283 | rte_spinlock_init(&heap->lock); |
1284 | heap->total_size = 0; |
1285 | heap->socket_id = next_socket_id; |
1286 | |
1287 | /* we hold a global mem hotplug writelock, so it's safe to increment */ |
1288 | mcfg->next_socket_id++; |
1289 | |
1290 | /* set up name */ |
1291 | strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN)rte_strlcpy(heap->name, heap_name, 32); |
1292 | return 0; |
1293 | } |
1294 | |
1295 | int |
1296 | malloc_heap_destroy(struct malloc_heap *heap) |
1297 | { |
1298 | if (heap->alloc_count != 0) { |
1299 | RTE_LOG(ERR, EAL, "Heap is still in use\n")rte_log(4U, 0, "EAL" ": " "Heap is still in use\n"); |
1300 | rte_errno(per_lcore__rte_errno) = EBUSY16; |
1301 | return -1; |
1302 | } |
1303 | if (heap->first != NULL((void*)0) || heap->last != NULL((void*)0)) { |
1304 | RTE_LOG(ERR, EAL, "Heap still contains memory segments\n")rte_log(4U, 0, "EAL" ": " "Heap still contains memory segments\n" ); |
1305 | rte_errno(per_lcore__rte_errno) = EBUSY16; |
1306 | return -1; |
1307 | } |
1308 | if (heap->total_size != 0) |
1309 | RTE_LOG(ERR, EAL, "Total size not zero, heap is likely corrupt\n")rte_log(4U, 0, "EAL" ": " "Total size not zero, heap is likely corrupt\n" ); |
1310 | |
1311 | /* after this, the lock will be dropped */ |
1312 | memset(heap, 0, sizeof(*heap)); |
1313 | |
1314 | return 0; |
1315 | } |
1316 | |
1317 | int |
1318 | rte_eal_malloc_heap_init(void) |
1319 | { |
1320 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
1321 | unsigned int i; |
1322 | |
1323 | if (internal_config.match_allocations) { |
1324 | RTE_LOG(DEBUG, EAL, "Hugepages will be freed exactly as allocated.\n")rte_log(8U, 0, "EAL" ": " "Hugepages will be freed exactly as allocated.\n" ); |
1325 | } |
1326 | |
1327 | if (rte_eal_process_type() == RTE_PROC_PRIMARY) { |
1328 | /* assign min socket ID to external heaps */ |
1329 | mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID(((1 << 8) > 8 ? (1 << 8) : 8)); |
1330 | |
1331 | /* assign names to default DPDK heaps */ |
1332 | for (i = 0; i < rte_socket_count(); i++) { |
1333 | struct malloc_heap *heap = &mcfg->malloc_heaps[i]; |
1334 | char heap_name[RTE_HEAP_NAME_MAX_LEN32]; |
1335 | int socket_id = rte_socket_id_by_idx(i); |
1336 | |
1337 | snprintf(heap_name, sizeof(heap_name) - 1, |
1338 | "socket_%i", socket_id); |
1339 | strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN)rte_strlcpy(heap->name, heap_name, 32); |
1340 | heap->socket_id = socket_id; |
1341 | } |
1342 | } |
1343 | |
1344 | |
1345 | if (register_mp_requests()) { |
1346 | RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n")rte_log(4U, 0, "EAL" ": " "Couldn't register malloc multiprocess actions\n" ); |
1347 | rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); |
1348 | return -1; |
1349 | } |
1350 | |
1351 | /* unlock mem hotplug here. it's safe for primary as no requests can |
1352 | * even come before primary itself is fully initialized, and secondaries |
1353 | * do not need to initialize the heap. |
1354 | */ |
1355 | rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); |
1356 | |
1357 | /* secondary process does not need to initialize anything */ |
1358 | if (rte_eal_process_type() != RTE_PROC_PRIMARY) |
1359 | return 0; |
1360 | |
1361 | /* add all IOVA-contiguous areas to the heap */ |
1362 | return rte_memseg_contig_walk(malloc_add_seg, NULL((void*)0)); |
1363 | } |