File: | home/bhubbard/working/src/ceph/src/spdk/dpdk/lib/librte_eal/linux/eal/eal_vfio.c |
Warning: | line 1177, column 2 Value stored to 'vfio_container_fd' is never read |
[?] Use j/k keys for keyboard navigation
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2018 Intel Corporation |
3 | */ |
4 | |
5 | #include <inttypes.h> |
6 | #include <string.h> |
7 | #include <fcntl.h> |
8 | #include <unistd.h> |
9 | #include <sys/ioctl.h> |
10 | |
11 | #include <rte_errno(per_lcore__rte_errno).h> |
12 | #include <rte_log.h> |
13 | #include <rte_memory.h> |
14 | #include <rte_eal_memconfig.h> |
15 | #include <rte_vfio.h> |
16 | |
17 | #include "eal_filesystem.h" |
18 | #include "eal_vfio.h" |
19 | #include "eal_private.h" |
20 | |
21 | #ifdef VFIO_PRESENT |
22 | |
23 | #define VFIO_MEM_EVENT_CLB_NAME"vfio_mem_event_clb" "vfio_mem_event_clb" |
24 | |
25 | /* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can |
26 | * recreate the mappings for DPDK segments, but we cannot do so for memory that |
27 | * was registered by the user themselves, so we need to store the user mappings |
28 | * somewhere, to recreate them later. |
29 | */ |
30 | #define VFIO_MAX_USER_MEM_MAPS256 256 |
31 | struct user_mem_map { |
32 | uint64_t addr; |
33 | uint64_t iova; |
34 | uint64_t len; |
35 | }; |
36 | |
37 | struct user_mem_maps { |
38 | rte_spinlock_recursive_t lock; |
39 | int n_maps; |
40 | struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS256]; |
41 | }; |
42 | |
43 | struct vfio_config { |
44 | int vfio_enabled; |
45 | int vfio_container_fd; |
46 | int vfio_active_groups; |
47 | const struct vfio_iommu_type *vfio_iommu_type; |
48 | struct vfio_group vfio_groups[VFIO_MAX_GROUPS64]; |
49 | struct user_mem_maps mem_maps; |
50 | }; |
51 | |
52 | /* per-process VFIO config */ |
53 | static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS64]; |
54 | static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0]; |
55 | |
56 | static int vfio_type1_dma_map(int); |
57 | static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); |
58 | static int vfio_spapr_dma_map(int); |
59 | static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); |
60 | static int vfio_noiommu_dma_map(int); |
61 | static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); |
62 | static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, |
63 | uint64_t iova, uint64_t len, int do_map); |
64 | |
65 | /* IOMMU types we support */ |
66 | static const struct vfio_iommu_type iommu_types[] = { |
67 | /* x86 IOMMU, otherwise known as type 1 */ |
68 | { |
69 | .type_id = RTE_VFIO_TYPE11, |
70 | .name = "Type 1", |
71 | .dma_map_func = &vfio_type1_dma_map, |
72 | .dma_user_map_func = &vfio_type1_dma_mem_map |
73 | }, |
74 | /* ppc64 IOMMU, otherwise known as spapr */ |
75 | { |
76 | .type_id = RTE_VFIO_SPAPR7, |
77 | .name = "sPAPR", |
78 | .dma_map_func = &vfio_spapr_dma_map, |
79 | .dma_user_map_func = &vfio_spapr_dma_mem_map |
80 | }, |
81 | /* IOMMU-less mode */ |
82 | { |
83 | .type_id = RTE_VFIO_NOIOMMU8, |
84 | .name = "No-IOMMU", |
85 | .dma_map_func = &vfio_noiommu_dma_map, |
86 | .dma_user_map_func = &vfio_noiommu_dma_mem_map |
87 | }, |
88 | }; |
89 | |
90 | static int |
91 | is_null_map(const struct user_mem_map *map) |
92 | { |
93 | return map->addr == 0 && map->iova == 0 && map->len == 0; |
94 | } |
95 | |
96 | /* we may need to merge user mem maps together in case of user mapping/unmapping |
97 | * chunks of memory, so we'll need a comparator function to sort segments. |
98 | */ |
99 | static int |
100 | user_mem_map_cmp(const void *a, const void *b) |
101 | { |
102 | const struct user_mem_map *umm_a = a; |
103 | const struct user_mem_map *umm_b = b; |
104 | |
105 | /* move null entries to end */ |
106 | if (is_null_map(umm_a)) |
107 | return 1; |
108 | if (is_null_map(umm_b)) |
109 | return -1; |
110 | |
111 | /* sort by iova first */ |
112 | if (umm_a->iova < umm_b->iova) |
113 | return -1; |
114 | if (umm_a->iova > umm_b->iova) |
115 | return 1; |
116 | |
117 | if (umm_a->addr < umm_b->addr) |
118 | return -1; |
119 | if (umm_a->addr > umm_b->addr) |
120 | return 1; |
121 | |
122 | if (umm_a->len < umm_b->len) |
123 | return -1; |
124 | if (umm_a->len > umm_b->len) |
125 | return 1; |
126 | |
127 | return 0; |
128 | } |
129 | |
130 | /* adjust user map entry. this may result in shortening of existing map, or in |
131 | * splitting existing map in two pieces. |
132 | */ |
133 | static void |
134 | adjust_map(struct user_mem_map *src, struct user_mem_map *end, |
135 | uint64_t remove_va_start, uint64_t remove_len) |
136 | { |
137 | /* if va start is same as start address, we're simply moving start */ |
138 | if (remove_va_start == src->addr) { |
139 | src->addr += remove_len; |
140 | src->iova += remove_len; |
141 | src->len -= remove_len; |
142 | } else if (remove_va_start + remove_len == src->addr + src->len) { |
143 | /* we're shrinking mapping from the end */ |
144 | src->len -= remove_len; |
145 | } else { |
146 | /* we're blowing a hole in the middle */ |
147 | struct user_mem_map tmp; |
148 | uint64_t total_len = src->len; |
149 | |
150 | /* adjust source segment length */ |
151 | src->len = remove_va_start - src->addr; |
152 | |
153 | /* create temporary segment in the middle */ |
154 | tmp.addr = src->addr + src->len; |
155 | tmp.iova = src->iova + src->len; |
156 | tmp.len = remove_len; |
157 | |
158 | /* populate end segment - this one we will be keeping */ |
159 | end->addr = tmp.addr + tmp.len; |
160 | end->iova = tmp.iova + tmp.len; |
161 | end->len = total_len - src->len - tmp.len; |
162 | } |
163 | } |
164 | |
165 | /* try merging two maps into one, return 1 if succeeded */ |
166 | static int |
167 | merge_map(struct user_mem_map *left, struct user_mem_map *right) |
168 | { |
169 | if (left->addr + left->len != right->addr) |
170 | return 0; |
171 | if (left->iova + left->len != right->iova) |
172 | return 0; |
173 | |
174 | left->len += right->len; |
175 | |
176 | memset(right, 0, sizeof(*right)); |
177 | |
178 | return 1; |
179 | } |
180 | |
181 | static struct user_mem_map * |
182 | find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr, |
183 | uint64_t iova, uint64_t len) |
184 | { |
185 | uint64_t va_end = addr + len; |
186 | uint64_t iova_end = iova + len; |
187 | int i; |
188 | |
189 | for (i = 0; i < user_mem_maps->n_maps; i++) { |
190 | struct user_mem_map *map = &user_mem_maps->maps[i]; |
191 | uint64_t map_va_end = map->addr + map->len; |
192 | uint64_t map_iova_end = map->iova + map->len; |
193 | |
194 | /* check start VA */ |
195 | if (addr < map->addr || addr >= map_va_end) |
196 | continue; |
197 | /* check if VA end is within boundaries */ |
198 | if (va_end <= map->addr || va_end > map_va_end) |
199 | continue; |
200 | |
201 | /* check start IOVA */ |
202 | if (iova < map->iova || iova >= map_iova_end) |
203 | continue; |
204 | /* check if IOVA end is within boundaries */ |
205 | if (iova_end <= map->iova || iova_end > map_iova_end) |
206 | continue; |
207 | |
208 | /* we've found our map */ |
209 | return map; |
210 | } |
211 | return NULL((void*)0); |
212 | } |
213 | |
214 | /* this will sort all user maps, and merge/compact any adjacent maps */ |
215 | static void |
216 | compact_user_maps(struct user_mem_maps *user_mem_maps) |
217 | { |
218 | int i, n_merged, cur_idx; |
219 | |
220 | qsort(user_mem_maps->maps, user_mem_maps->n_maps, |
221 | sizeof(user_mem_maps->maps[0]), user_mem_map_cmp); |
222 | |
223 | /* we'll go over the list backwards when merging */ |
224 | n_merged = 0; |
225 | for (i = user_mem_maps->n_maps - 2; i >= 0; i--) { |
226 | struct user_mem_map *l, *r; |
227 | |
228 | l = &user_mem_maps->maps[i]; |
229 | r = &user_mem_maps->maps[i + 1]; |
230 | |
231 | if (is_null_map(l) || is_null_map(r)) |
232 | continue; |
233 | |
234 | if (merge_map(l, r)) |
235 | n_merged++; |
236 | } |
237 | |
238 | /* the entries are still sorted, but now they have holes in them, so |
239 | * walk through the list and remove the holes |
240 | */ |
241 | if (n_merged > 0) { |
242 | cur_idx = 0; |
243 | for (i = 0; i < user_mem_maps->n_maps; i++) { |
244 | if (!is_null_map(&user_mem_maps->maps[i])) { |
245 | struct user_mem_map *src, *dst; |
246 | |
247 | src = &user_mem_maps->maps[i]; |
248 | dst = &user_mem_maps->maps[cur_idx++]; |
249 | |
250 | if (src != dst) { |
251 | memcpy(dst, src, sizeof(*src)); |
252 | memset(src, 0, sizeof(*src)); |
253 | } |
254 | } |
255 | } |
256 | user_mem_maps->n_maps = cur_idx; |
257 | } |
258 | } |
259 | |
260 | static int |
261 | vfio_open_group_fd(int iommu_group_num) |
262 | { |
263 | int vfio_group_fd; |
264 | char filename[PATH_MAX4096]; |
265 | struct rte_mp_msg mp_req, *mp_rep; |
266 | struct rte_mp_reply mp_reply; |
267 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; |
268 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; |
269 | |
270 | /* if primary, try to open the group */ |
271 | if (internal_config.process_type == RTE_PROC_PRIMARY) { |
272 | /* try regular group format */ |
273 | snprintf(filename, sizeof(filename), |
274 | VFIO_GROUP_FMT"/dev/vfio/%u", iommu_group_num); |
275 | vfio_group_fd = open(filename, O_RDWR02); |
276 | if (vfio_group_fd < 0) { |
277 | /* if file not found, it's not an error */ |
278 | if (errno(*__errno_location ()) != ENOENT2) { |
279 | RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,rte_log(4U, 0, "EAL" ": " "Cannot open %s: %s\n", filename, strerror ((*__errno_location ()))) |
280 | strerror(errno))rte_log(4U, 0, "EAL" ": " "Cannot open %s: %s\n", filename, strerror ((*__errno_location ()))); |
281 | return -1; |
282 | } |
283 | |
284 | /* special case: try no-IOMMU path as well */ |
285 | snprintf(filename, sizeof(filename), |
286 | VFIO_NOIOMMU_GROUP_FMT"/dev/vfio/noiommu-%u", |
287 | iommu_group_num); |
288 | vfio_group_fd = open(filename, O_RDWR02); |
289 | if (vfio_group_fd < 0) { |
290 | if (errno(*__errno_location ()) != ENOENT2) { |
291 | RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,rte_log(4U, 0, "EAL" ": " "Cannot open %s: %s\n", filename, strerror ((*__errno_location ()))) |
292 | strerror(errno))rte_log(4U, 0, "EAL" ": " "Cannot open %s: %s\n", filename, strerror ((*__errno_location ()))); |
293 | return -1; |
294 | } |
295 | return 0; |
296 | } |
297 | /* noiommu group found */ |
298 | } |
299 | |
300 | return vfio_group_fd; |
301 | } |
302 | /* if we're in a secondary process, request group fd from the primary |
303 | * process via mp channel. |
304 | */ |
305 | p->req = SOCKET_REQ_GROUP0x200; |
306 | p->group_num = iommu_group_num; |
307 | strcpy(mp_req.name, EAL_VFIO_MP"eal_vfio_mp_sync"); |
308 | mp_req.len_param = sizeof(*p); |
309 | mp_req.num_fds = 0; |
310 | |
311 | vfio_group_fd = -1; |
312 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && |
313 | mp_reply.nb_received == 1) { |
314 | mp_rep = &mp_reply.msgs[0]; |
315 | p = (struct vfio_mp_param *)mp_rep->param; |
316 | if (p->result == SOCKET_OK0x0 && mp_rep->num_fds == 1) { |
317 | vfio_group_fd = mp_rep->fds[0]; |
318 | } else if (p->result == SOCKET_NO_FD0x1) { |
319 | RTE_LOG(ERR, EAL, " bad VFIO group fd\n")rte_log(4U, 0, "EAL" ": " " bad VFIO group fd\n"); |
320 | vfio_group_fd = 0; |
321 | } |
322 | free(mp_reply.msgs); |
323 | } |
324 | |
325 | if (vfio_group_fd < 0) |
326 | RTE_LOG(ERR, EAL, " cannot request group fd\n")rte_log(4U, 0, "EAL" ": " " cannot request group fd\n"); |
327 | return vfio_group_fd; |
328 | } |
329 | |
330 | static struct vfio_config * |
331 | get_vfio_cfg_by_group_num(int iommu_group_num) |
332 | { |
333 | struct vfio_config *vfio_cfg; |
334 | int i, j; |
335 | |
336 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { |
337 | vfio_cfg = &vfio_cfgs[i]; |
338 | for (j = 0; j < VFIO_MAX_GROUPS64; j++) { |
339 | if (vfio_cfg->vfio_groups[j].group_num == |
340 | iommu_group_num) |
341 | return vfio_cfg; |
342 | } |
343 | } |
344 | |
345 | return NULL((void*)0); |
346 | } |
347 | |
348 | static int |
349 | vfio_get_group_fd(struct vfio_config *vfio_cfg, |
350 | int iommu_group_num) |
351 | { |
352 | int i; |
353 | int vfio_group_fd; |
354 | struct vfio_group *cur_grp; |
355 | |
356 | /* check if we already have the group descriptor open */ |
357 | for (i = 0; i < VFIO_MAX_GROUPS64; i++) |
358 | if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) |
359 | return vfio_cfg->vfio_groups[i].fd; |
360 | |
361 | /* Lets see first if there is room for a new group */ |
362 | if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS64) { |
363 | RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n")rte_log(4U, 0, "EAL" ": " "Maximum number of VFIO groups reached!\n" ); |
364 | return -1; |
365 | } |
366 | |
367 | /* Now lets get an index for the new group */ |
368 | for (i = 0; i < VFIO_MAX_GROUPS64; i++) |
369 | if (vfio_cfg->vfio_groups[i].group_num == -1) { |
370 | cur_grp = &vfio_cfg->vfio_groups[i]; |
371 | break; |
372 | } |
373 | |
374 | /* This should not happen */ |
375 | if (i == VFIO_MAX_GROUPS64) { |
376 | RTE_LOG(ERR, EAL, "No VFIO group free slot found\n")rte_log(4U, 0, "EAL" ": " "No VFIO group free slot found\n"); |
377 | return -1; |
378 | } |
379 | |
380 | vfio_group_fd = vfio_open_group_fd(iommu_group_num); |
381 | if (vfio_group_fd < 0) { |
382 | RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num)rte_log(4U, 0, "EAL" ": " "Failed to open group %d\n", iommu_group_num ); |
383 | return -1; |
384 | } |
385 | |
386 | cur_grp->group_num = iommu_group_num; |
387 | cur_grp->fd = vfio_group_fd; |
388 | vfio_cfg->vfio_active_groups++; |
389 | |
390 | return vfio_group_fd; |
391 | } |
392 | |
393 | static struct vfio_config * |
394 | get_vfio_cfg_by_group_fd(int vfio_group_fd) |
395 | { |
396 | struct vfio_config *vfio_cfg; |
397 | int i, j; |
398 | |
399 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { |
400 | vfio_cfg = &vfio_cfgs[i]; |
401 | for (j = 0; j < VFIO_MAX_GROUPS64; j++) |
402 | if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) |
403 | return vfio_cfg; |
404 | } |
405 | |
406 | return NULL((void*)0); |
407 | } |
408 | |
409 | static struct vfio_config * |
410 | get_vfio_cfg_by_container_fd(int container_fd) |
411 | { |
412 | int i; |
413 | |
414 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { |
415 | if (vfio_cfgs[i].vfio_container_fd == container_fd) |
416 | return &vfio_cfgs[i]; |
417 | } |
418 | |
419 | return NULL((void*)0); |
420 | } |
421 | |
422 | int |
423 | rte_vfio_get_group_fd(int iommu_group_num) |
424 | { |
425 | struct vfio_config *vfio_cfg; |
426 | |
427 | /* get the vfio_config it belongs to */ |
428 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); |
429 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; |
430 | |
431 | return vfio_get_group_fd(vfio_cfg, iommu_group_num); |
432 | } |
433 | |
434 | static int |
435 | get_vfio_group_idx(int vfio_group_fd) |
436 | { |
437 | struct vfio_config *vfio_cfg; |
438 | int i, j; |
439 | |
440 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { |
441 | vfio_cfg = &vfio_cfgs[i]; |
442 | for (j = 0; j < VFIO_MAX_GROUPS64; j++) |
443 | if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) |
444 | return j; |
445 | } |
446 | |
447 | return -1; |
448 | } |
449 | |
450 | static void |
451 | vfio_group_device_get(int vfio_group_fd) |
452 | { |
453 | struct vfio_config *vfio_cfg; |
454 | int i; |
455 | |
456 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); |
457 | if (vfio_cfg == NULL((void*)0)) { |
458 | RTE_LOG(ERR, EAL, " invalid group fd!\n")rte_log(4U, 0, "EAL" ": " " invalid group fd!\n"); |
459 | return; |
460 | } |
461 | |
462 | i = get_vfio_group_idx(vfio_group_fd); |
463 | if (i < 0 || i > (VFIO_MAX_GROUPS64 - 1)) |
464 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i)rte_log(4U, 0, "EAL" ": " " wrong vfio_group index (%d)\n", i ); |
465 | else |
466 | vfio_cfg->vfio_groups[i].devices++; |
467 | } |
468 | |
469 | static void |
470 | vfio_group_device_put(int vfio_group_fd) |
471 | { |
472 | struct vfio_config *vfio_cfg; |
473 | int i; |
474 | |
475 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); |
476 | if (vfio_cfg == NULL((void*)0)) { |
477 | RTE_LOG(ERR, EAL, " invalid group fd!\n")rte_log(4U, 0, "EAL" ": " " invalid group fd!\n"); |
478 | return; |
479 | } |
480 | |
481 | i = get_vfio_group_idx(vfio_group_fd); |
482 | if (i < 0 || i > (VFIO_MAX_GROUPS64 - 1)) |
483 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i)rte_log(4U, 0, "EAL" ": " " wrong vfio_group index (%d)\n", i ); |
484 | else |
485 | vfio_cfg->vfio_groups[i].devices--; |
486 | } |
487 | |
488 | static int |
489 | vfio_group_device_count(int vfio_group_fd) |
490 | { |
491 | struct vfio_config *vfio_cfg; |
492 | int i; |
493 | |
494 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); |
495 | if (vfio_cfg == NULL((void*)0)) { |
496 | RTE_LOG(ERR, EAL, " invalid group fd!\n")rte_log(4U, 0, "EAL" ": " " invalid group fd!\n"); |
497 | return -1; |
498 | } |
499 | |
500 | i = get_vfio_group_idx(vfio_group_fd); |
501 | if (i < 0 || i > (VFIO_MAX_GROUPS64 - 1)) { |
502 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i)rte_log(4U, 0, "EAL" ": " " wrong vfio_group index (%d)\n", i ); |
503 | return -1; |
504 | } |
505 | |
506 | return vfio_cfg->vfio_groups[i].devices; |
507 | } |
508 | |
509 | static void |
510 | vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len, |
511 | void *arg __rte_unused__attribute__((__unused__))) |
512 | { |
513 | struct rte_memseg_list *msl; |
514 | struct rte_memseg *ms; |
515 | size_t cur_len = 0; |
516 | |
517 | msl = rte_mem_virt2memseg_list(addr); |
518 | |
519 | /* for IOVA as VA mode, no need to care for IOVA addresses */ |
520 | if (rte_eal_iova_mode() == RTE_IOVA_VA && msl->external == 0) { |
521 | uint64_t vfio_va = (uint64_t)(uintptr_t)addr; |
522 | if (type == RTE_MEM_EVENT_ALLOC) |
523 | vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, |
524 | len, 1); |
525 | else |
526 | vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, |
527 | len, 0); |
528 | return; |
529 | } |
530 | |
531 | /* memsegs are contiguous in memory */ |
532 | ms = rte_mem_virt2memseg(addr, msl); |
533 | while (cur_len < len) { |
534 | /* some memory segments may have invalid IOVA */ |
535 | if (ms->iova == RTE_BAD_IOVA((rte_iova_t)-1)) { |
536 | RTE_LOG(DEBUG, EAL, "Memory segment at %p has bad IOVA, skipping\n",rte_log(8U, 0, "EAL" ": " "Memory segment at %p has bad IOVA, skipping\n" , ms->addr) |
537 | ms->addr)rte_log(8U, 0, "EAL" ": " "Memory segment at %p has bad IOVA, skipping\n" , ms->addr); |
538 | goto next; |
539 | } |
540 | if (type == RTE_MEM_EVENT_ALLOC) |
541 | vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, |
542 | ms->iova, ms->len, 1); |
543 | else |
544 | vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, |
545 | ms->iova, ms->len, 0); |
546 | next: |
547 | cur_len += ms->len; |
548 | ++ms; |
549 | } |
550 | } |
551 | |
552 | static int |
553 | vfio_sync_default_container(void) |
554 | { |
555 | struct rte_mp_msg mp_req, *mp_rep; |
556 | struct rte_mp_reply mp_reply; |
557 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; |
558 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; |
559 | int iommu_type_id; |
560 | unsigned int i; |
561 | |
562 | /* cannot be called from primary */ |
563 | if (rte_eal_process_type() != RTE_PROC_SECONDARY) |
564 | return -1; |
565 | |
566 | /* default container fd should have been opened in rte_vfio_enable() */ |
567 | if (!default_vfio_cfg->vfio_enabled || |
568 | default_vfio_cfg->vfio_container_fd < 0) { |
569 | RTE_LOG(ERR, EAL, "VFIO support is not initialized\n")rte_log(4U, 0, "EAL" ": " "VFIO support is not initialized\n" ); |
570 | return -1; |
571 | } |
572 | |
573 | /* find default container's IOMMU type */ |
574 | p->req = SOCKET_REQ_IOMMU_TYPE0x800; |
575 | strcpy(mp_req.name, EAL_VFIO_MP"eal_vfio_mp_sync"); |
576 | mp_req.len_param = sizeof(*p); |
577 | mp_req.num_fds = 0; |
578 | |
579 | iommu_type_id = -1; |
580 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && |
581 | mp_reply.nb_received == 1) { |
582 | mp_rep = &mp_reply.msgs[0]; |
583 | p = (struct vfio_mp_param *)mp_rep->param; |
584 | if (p->result == SOCKET_OK0x0) |
585 | iommu_type_id = p->iommu_type_id; |
586 | free(mp_reply.msgs); |
587 | } |
588 | if (iommu_type_id < 0) { |
589 | RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n")rte_log(4U, 0, "EAL" ": " "Could not get IOMMU type for default container\n" ); |
590 | return -1; |
591 | } |
592 | |
593 | /* we now have an fd for default container, as well as its IOMMU type. |
594 | * now, set up default VFIO container config to match. |
595 | */ |
596 | for (i = 0; i < RTE_DIM(iommu_types)(sizeof (iommu_types) / sizeof ((iommu_types)[0])); i++) { |
597 | const struct vfio_iommu_type *t = &iommu_types[i]; |
598 | if (t->type_id != iommu_type_id) |
599 | continue; |
600 | |
601 | /* we found our IOMMU type */ |
602 | default_vfio_cfg->vfio_iommu_type = t; |
603 | |
604 | return 0; |
605 | } |
606 | RTE_LOG(ERR, EAL, "Could not find IOMMU type id (%i)\n",rte_log(4U, 0, "EAL" ": " "Could not find IOMMU type id (%i)\n" , iommu_type_id) |
607 | iommu_type_id)rte_log(4U, 0, "EAL" ": " "Could not find IOMMU type id (%i)\n" , iommu_type_id); |
608 | return -1; |
609 | } |
610 | |
611 | int |
612 | rte_vfio_clear_group(int vfio_group_fd) |
613 | { |
614 | int i; |
615 | struct vfio_config *vfio_cfg; |
616 | |
617 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); |
618 | if (vfio_cfg == NULL((void*)0)) { |
619 | RTE_LOG(ERR, EAL, " invalid group fd!\n")rte_log(4U, 0, "EAL" ": " " invalid group fd!\n"); |
620 | return -1; |
621 | } |
622 | |
623 | i = get_vfio_group_idx(vfio_group_fd); |
624 | if (i < 0) |
625 | return -1; |
626 | vfio_cfg->vfio_groups[i].group_num = -1; |
627 | vfio_cfg->vfio_groups[i].fd = -1; |
628 | vfio_cfg->vfio_groups[i].devices = 0; |
629 | vfio_cfg->vfio_active_groups--; |
630 | |
631 | return 0; |
632 | } |
633 | |
634 | int |
635 | rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, |
636 | int *vfio_dev_fd, struct vfio_device_info *device_info) |
637 | { |
638 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
639 | rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; |
640 | struct vfio_group_status group_status = { |
641 | .argsz = sizeof(group_status) |
642 | }; |
643 | struct vfio_config *vfio_cfg; |
644 | struct user_mem_maps *user_mem_maps; |
645 | int vfio_container_fd; |
646 | int vfio_group_fd; |
647 | int iommu_group_num; |
648 | int i, ret; |
649 | |
650 | /* get group number */ |
651 | ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); |
652 | if (ret == 0) { |
653 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver, skipping\n" , dev_addr) |
654 | dev_addr)rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver, skipping\n" , dev_addr); |
655 | return 1; |
656 | } |
657 | |
658 | /* if negative, something failed */ |
659 | if (ret < 0) |
660 | return -1; |
661 | |
662 | /* get the actual group fd */ |
663 | vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); |
664 | if (vfio_group_fd < 0) |
665 | return -1; |
666 | |
667 | /* if group_fd == 0, that means the device isn't managed by VFIO */ |
668 | if (vfio_group_fd == 0) { |
669 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver, skipping\n" , dev_addr) |
670 | dev_addr)rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver, skipping\n" , dev_addr); |
671 | return 1; |
672 | } |
673 | |
674 | /* |
675 | * at this point, we know that this group is viable (meaning, all devices |
676 | * are either bound to VFIO or not bound to anything) |
677 | */ |
678 | |
679 | /* check if the group is viable */ |
680 | ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 3)) << 0) | ((0) << ((0 +8)+8))), &group_status); |
681 | if (ret) { |
682 | RTE_LOG(ERR, EAL, " %s cannot get group status, "rte_log(4U, 0, "EAL" ": " " %s cannot get group status, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) |
683 | "error %i (%s)\n", dev_addr, errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " %s cannot get group status, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))); |
684 | close(vfio_group_fd); |
685 | rte_vfio_clear_group(vfio_group_fd); |
686 | return -1; |
687 | } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE(1 << 0))) { |
688 | RTE_LOG(ERR, EAL, " %s VFIO group is not viable! "rte_log(4U, 0, "EAL" ": " " %s VFIO group is not viable! " "Not all devices in IOMMU group bound to VFIO or unbound\n" , dev_addr) |
689 | "Not all devices in IOMMU group bound to VFIO or unbound\n",rte_log(4U, 0, "EAL" ": " " %s VFIO group is not viable! " "Not all devices in IOMMU group bound to VFIO or unbound\n" , dev_addr) |
690 | dev_addr)rte_log(4U, 0, "EAL" ": " " %s VFIO group is not viable! " "Not all devices in IOMMU group bound to VFIO or unbound\n" , dev_addr); |
691 | close(vfio_group_fd); |
692 | rte_vfio_clear_group(vfio_group_fd); |
693 | return -1; |
694 | } |
695 | |
696 | /* get the vfio_config it belongs to */ |
697 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); |
698 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; |
699 | vfio_container_fd = vfio_cfg->vfio_container_fd; |
700 | user_mem_maps = &vfio_cfg->mem_maps; |
701 | |
702 | /* check if group does not have a container yet */ |
703 | if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET(1 << 1))) { |
704 | |
705 | /* add group to a container */ |
706 | ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 4)) << 0) | ((0) << ((0 +8)+8))), |
707 | &vfio_container_fd); |
708 | if (ret) { |
709 | RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "rte_log(4U, 0, "EAL" ": " " %s cannot add VFIO group to container, " "error %i (%s)\n", dev_addr, (*__errno_location ()), strerror ((*__errno_location ()))) |
710 | "error %i (%s)\n", dev_addr, errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " %s cannot add VFIO group to container, " "error %i (%s)\n", dev_addr, (*__errno_location ()), strerror ((*__errno_location ()))); |
711 | close(vfio_group_fd); |
712 | rte_vfio_clear_group(vfio_group_fd); |
713 | return -1; |
714 | } |
715 | |
716 | /* |
717 | * pick an IOMMU type and set up DMA mappings for container |
718 | * |
719 | * needs to be done only once, only when first group is |
720 | * assigned to a container and only in primary process. |
721 | * Note this can happen several times with the hotplug |
722 | * functionality. |
723 | */ |
724 | if (internal_config.process_type == RTE_PROC_PRIMARY && |
725 | vfio_cfg->vfio_active_groups == 1 && |
726 | vfio_group_device_count(vfio_group_fd) == 0) { |
727 | const struct vfio_iommu_type *t; |
728 | |
729 | /* select an IOMMU type which we will be using */ |
730 | t = vfio_set_iommu_type(vfio_container_fd); |
731 | if (!t) { |
732 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " %s failed to select IOMMU type\n" , dev_addr) |
733 | " %s failed to select IOMMU type\n",rte_log(4U, 0, "EAL" ": " " %s failed to select IOMMU type\n" , dev_addr) |
734 | dev_addr)rte_log(4U, 0, "EAL" ": " " %s failed to select IOMMU type\n" , dev_addr); |
735 | close(vfio_group_fd); |
736 | rte_vfio_clear_group(vfio_group_fd); |
737 | return -1; |
738 | } |
739 | /* lock memory hotplug before mapping and release it |
740 | * after registering callback, to prevent races |
741 | */ |
742 | rte_rwlock_read_lock(mem_lock); |
743 | if (vfio_cfg == default_vfio_cfg) |
744 | ret = t->dma_map_func(vfio_container_fd); |
745 | else |
746 | ret = 0; |
747 | if (ret) { |
748 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " %s DMA remapping failed, error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) |
749 | " %s DMA remapping failed, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " %s DMA remapping failed, error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) |
750 | dev_addr, errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " %s DMA remapping failed, error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))); |
751 | close(vfio_group_fd); |
752 | rte_vfio_clear_group(vfio_group_fd); |
753 | rte_rwlock_read_unlock(mem_lock); |
754 | return -1; |
755 | } |
756 | |
757 | vfio_cfg->vfio_iommu_type = t; |
758 | |
759 | /* re-map all user-mapped segments */ |
760 | rte_spinlock_recursive_lock(&user_mem_maps->lock); |
761 | |
762 | /* this IOMMU type may not support DMA mapping, but |
763 | * if we have mappings in the list - that means we have |
764 | * previously mapped something successfully, so we can |
765 | * be sure that DMA mapping is supported. |
766 | */ |
767 | for (i = 0; i < user_mem_maps->n_maps; i++) { |
768 | struct user_mem_map *map; |
769 | map = &user_mem_maps->maps[i]; |
770 | |
771 | ret = t->dma_user_map_func( |
772 | vfio_container_fd, |
773 | map->addr, map->iova, map->len, |
774 | 1); |
775 | if (ret) { |
776 | RTE_LOG(ERR, EAL, "Couldn't map user memory for DMA: "rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) |
777 | "va: 0x%" PRIx64 " "rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) |
778 | "iova: 0x%" PRIx64 " "rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) |
779 | "len: 0x%" PRIu64 "\n",rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) |
780 | map->addr, map->iova,rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) |
781 | map->len)rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len); |
782 | rte_spinlock_recursive_unlock( |
783 | &user_mem_maps->lock); |
784 | rte_rwlock_read_unlock(mem_lock); |
785 | return -1; |
786 | } |
787 | } |
788 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); |
789 | |
790 | /* register callback for mem events */ |
791 | if (vfio_cfg == default_vfio_cfg) |
792 | ret = rte_mem_event_callback_register( |
793 | VFIO_MEM_EVENT_CLB_NAME"vfio_mem_event_clb", |
794 | vfio_mem_event_callback, NULL((void*)0)); |
795 | else |
796 | ret = 0; |
797 | /* unlock memory hotplug */ |
798 | rte_rwlock_read_unlock(mem_lock); |
799 | |
800 | if (ret && rte_errno(per_lcore__rte_errno) != ENOTSUP95) { |
801 | RTE_LOG(ERR, EAL, "Could not install memory event callback for VFIO\n")rte_log(4U, 0, "EAL" ": " "Could not install memory event callback for VFIO\n" ); |
802 | return -1; |
803 | } |
804 | if (ret) |
805 | RTE_LOG(DEBUG, EAL, "Memory event callbacks not supported\n")rte_log(8U, 0, "EAL" ": " "Memory event callbacks not supported\n" ); |
806 | else |
807 | RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n")rte_log(8U, 0, "EAL" ": " "Installed memory event callback for VFIO\n" ); |
808 | } |
809 | } else if (rte_eal_process_type() != RTE_PROC_PRIMARY && |
810 | vfio_cfg == default_vfio_cfg && |
811 | vfio_cfg->vfio_iommu_type == NULL((void*)0)) { |
812 | /* if we're not a primary process, we do not set up the VFIO |
813 | * container because it's already been set up by the primary |
814 | * process. instead, we simply ask the primary about VFIO type |
815 | * we are using, and set the VFIO config up appropriately. |
816 | */ |
817 | ret = vfio_sync_default_container(); |
818 | if (ret < 0) { |
819 | RTE_LOG(ERR, EAL, "Could not sync default VFIO container\n")rte_log(4U, 0, "EAL" ": " "Could not sync default VFIO container\n" ); |
820 | close(vfio_group_fd); |
821 | rte_vfio_clear_group(vfio_group_fd); |
822 | return -1; |
823 | } |
824 | /* we have successfully initialized VFIO, notify user */ |
825 | const struct vfio_iommu_type *t = |
826 | default_vfio_cfg->vfio_iommu_type; |
827 | RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",rte_log(6U, 0, "EAL" ": " " using IOMMU type %d (%s)\n", t-> type_id, t->name) |
828 | t->type_id, t->name)rte_log(6U, 0, "EAL" ": " " using IOMMU type %d (%s)\n", t-> type_id, t->name); |
829 | } |
830 | |
831 | /* get a file descriptor for the device */ |
832 | *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 6)) << 0) | ((0) << ((0 +8)+8))), dev_addr); |
833 | if (*vfio_dev_fd < 0) { |
834 | /* if we cannot get a device fd, this implies a problem with |
835 | * the VFIO group or the container not having IOMMU configured. |
836 | */ |
837 | |
838 | RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n",rte_log(5U, 0, "EAL" ": " "Getting a vfio_dev_fd for %s failed\n" , dev_addr) |
839 | dev_addr)rte_log(5U, 0, "EAL" ": " "Getting a vfio_dev_fd for %s failed\n" , dev_addr); |
840 | close(vfio_group_fd); |
841 | rte_vfio_clear_group(vfio_group_fd); |
842 | return -1; |
843 | } |
844 | |
845 | /* test and setup the device */ |
846 | ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 7)) << 0) | ((0) << ((0 +8)+8))), device_info); |
847 | if (ret) { |
848 | RTE_LOG(ERR, EAL, " %s cannot get device info, "rte_log(4U, 0, "EAL" ": " " %s cannot get device info, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) |
849 | "error %i (%s)\n", dev_addr, errno,rte_log(4U, 0, "EAL" ": " " %s cannot get device info, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) |
850 | strerror(errno))rte_log(4U, 0, "EAL" ": " " %s cannot get device info, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))); |
851 | close(*vfio_dev_fd); |
852 | close(vfio_group_fd); |
853 | rte_vfio_clear_group(vfio_group_fd); |
854 | return -1; |
855 | } |
856 | vfio_group_device_get(vfio_group_fd); |
857 | |
858 | return 0; |
859 | } |
860 | |
861 | int |
862 | rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, |
863 | int vfio_dev_fd) |
864 | { |
865 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; |
866 | rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; |
867 | struct vfio_group_status group_status = { |
868 | .argsz = sizeof(group_status) |
869 | }; |
870 | struct vfio_config *vfio_cfg; |
871 | int vfio_group_fd; |
872 | int iommu_group_num; |
873 | int ret; |
874 | |
875 | /* we don't want any DMA mapping messages to come while we're detaching |
876 | * VFIO device, because this might be the last device and we might need |
877 | * to unregister the callback. |
878 | */ |
879 | rte_rwlock_read_lock(mem_lock); |
880 | |
881 | /* get group number */ |
882 | ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); |
883 | if (ret <= 0) { |
884 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n",rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver\n" , dev_addr) |
885 | dev_addr)rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver\n" , dev_addr); |
886 | /* This is an error at this point. */ |
887 | ret = -1; |
888 | goto out; |
889 | } |
890 | |
891 | /* get the actual group fd */ |
892 | vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); |
893 | if (vfio_group_fd <= 0) { |
894 | RTE_LOG(INFO, EAL, "rte_vfio_get_group_fd failed for %s\n",rte_log(7U, 0, "EAL" ": " "rte_vfio_get_group_fd failed for %s\n" , dev_addr) |
895 | dev_addr)rte_log(7U, 0, "EAL" ": " "rte_vfio_get_group_fd failed for %s\n" , dev_addr); |
896 | ret = -1; |
897 | goto out; |
898 | } |
899 | |
900 | /* get the vfio_config it belongs to */ |
901 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); |
902 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; |
903 | |
904 | /* At this point we got an active group. Closing it will make the |
905 | * container detachment. If this is the last active group, VFIO kernel |
906 | * code will unset the container and the IOMMU mappings. |
907 | */ |
908 | |
909 | /* Closing a device */ |
910 | if (close(vfio_dev_fd) < 0) { |
911 | RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",rte_log(7U, 0, "EAL" ": " "Error when closing vfio_dev_fd for %s\n" , dev_addr) |
912 | dev_addr)rte_log(7U, 0, "EAL" ": " "Error when closing vfio_dev_fd for %s\n" , dev_addr); |
913 | ret = -1; |
914 | goto out; |
915 | } |
916 | |
917 | /* An VFIO group can have several devices attached. Just when there is |
918 | * no devices remaining should the group be closed. |
919 | */ |
920 | vfio_group_device_put(vfio_group_fd); |
921 | if (!vfio_group_device_count(vfio_group_fd)) { |
922 | |
923 | if (close(vfio_group_fd) < 0) { |
924 | RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",rte_log(7U, 0, "EAL" ": " "Error when closing vfio_group_fd for %s\n" , dev_addr) |
925 | dev_addr)rte_log(7U, 0, "EAL" ": " "Error when closing vfio_group_fd for %s\n" , dev_addr); |
926 | ret = -1; |
927 | goto out; |
928 | } |
929 | |
930 | if (rte_vfio_clear_group(vfio_group_fd) < 0) { |
931 | RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",rte_log(7U, 0, "EAL" ": " "Error when clearing group for %s\n" , dev_addr) |
932 | dev_addr)rte_log(7U, 0, "EAL" ": " "Error when clearing group for %s\n" , dev_addr); |
933 | ret = -1; |
934 | goto out; |
935 | } |
936 | } |
937 | |
938 | /* if there are no active device groups, unregister the callback to |
939 | * avoid spurious attempts to map/unmap memory from VFIO. |
940 | */ |
941 | if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0 && |
942 | rte_eal_process_type() != RTE_PROC_SECONDARY) |
943 | rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME"vfio_mem_event_clb", |
944 | NULL((void*)0)); |
945 | |
946 | /* success */ |
947 | ret = 0; |
948 | |
949 | out: |
950 | rte_rwlock_read_unlock(mem_lock); |
951 | return ret; |
952 | } |
953 | |
954 | int |
955 | rte_vfio_enable(const char *modname) |
956 | { |
957 | /* initialize group list */ |
958 | int i, j; |
959 | int vfio_available; |
960 | |
961 | rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER{{ 0 }, -1, 0}; |
962 | |
963 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { |
964 | vfio_cfgs[i].vfio_container_fd = -1; |
965 | vfio_cfgs[i].vfio_active_groups = 0; |
966 | vfio_cfgs[i].vfio_iommu_type = NULL((void*)0); |
967 | vfio_cfgs[i].mem_maps.lock = lock; |
968 | |
969 | for (j = 0; j < VFIO_MAX_GROUPS64; j++) { |
970 | vfio_cfgs[i].vfio_groups[j].fd = -1; |
971 | vfio_cfgs[i].vfio_groups[j].group_num = -1; |
972 | vfio_cfgs[i].vfio_groups[j].devices = 0; |
973 | } |
974 | } |
975 | |
976 | /* inform the user that we are probing for VFIO */ |
977 | RTE_LOG(INFO, EAL, "Probing VFIO support...\n")rte_log(7U, 0, "EAL" ": " "Probing VFIO support...\n"); |
978 | |
979 | /* check if vfio module is loaded */ |
980 | vfio_available = rte_eal_check_module(modname); |
981 | |
982 | /* return error directly */ |
983 | if (vfio_available == -1) { |
984 | RTE_LOG(INFO, EAL, "Could not get loaded module details!\n")rte_log(7U, 0, "EAL" ": " "Could not get loaded module details!\n" ); |
985 | return -1; |
986 | } |
987 | |
988 | /* return 0 if VFIO modules not loaded */ |
989 | if (vfio_available == 0) { |
990 | RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, "rte_log(8U, 0, "EAL" ": " "VFIO modules not loaded, " "skipping VFIO support...\n" ) |
991 | "skipping VFIO support...\n")rte_log(8U, 0, "EAL" ": " "VFIO modules not loaded, " "skipping VFIO support...\n" ); |
992 | return 0; |
993 | } |
994 | |
995 | if (internal_config.process_type == RTE_PROC_PRIMARY) { |
996 | /* open a new container */ |
997 | default_vfio_cfg->vfio_container_fd = |
998 | rte_vfio_get_container_fd(); |
999 | } else { |
1000 | /* get the default container from the primary process */ |
1001 | default_vfio_cfg->vfio_container_fd = |
1002 | vfio_get_default_container_fd(); |
1003 | } |
1004 | |
1005 | /* check if we have VFIO driver enabled */ |
1006 | if (default_vfio_cfg->vfio_container_fd != -1) { |
1007 | RTE_LOG(NOTICE, EAL, "VFIO support initialized\n")rte_log(6U, 0, "EAL" ": " "VFIO support initialized\n"); |
1008 | default_vfio_cfg->vfio_enabled = 1; |
1009 | } else { |
1010 | RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n")rte_log(6U, 0, "EAL" ": " "VFIO support could not be initialized\n" ); |
1011 | } |
1012 | |
1013 | return 0; |
1014 | } |
1015 | |
1016 | int |
1017 | rte_vfio_is_enabled(const char *modname) |
1018 | { |
1019 | const int mod_available = rte_eal_check_module(modname) > 0; |
1020 | return default_vfio_cfg->vfio_enabled && mod_available; |
1021 | } |
1022 | |
1023 | int |
1024 | vfio_get_default_container_fd(void) |
1025 | { |
1026 | struct rte_mp_msg mp_req, *mp_rep; |
1027 | struct rte_mp_reply mp_reply; |
1028 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; |
1029 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; |
1030 | |
1031 | if (default_vfio_cfg->vfio_enabled) |
1032 | return default_vfio_cfg->vfio_container_fd; |
1033 | |
1034 | if (internal_config.process_type == RTE_PROC_PRIMARY) { |
1035 | /* if we were secondary process we would try requesting |
1036 | * container fd from the primary, but we're the primary |
1037 | * process so just exit here |
1038 | */ |
1039 | return -1; |
1040 | } |
1041 | |
1042 | p->req = SOCKET_REQ_DEFAULT_CONTAINER0x400; |
1043 | strcpy(mp_req.name, EAL_VFIO_MP"eal_vfio_mp_sync"); |
1044 | mp_req.len_param = sizeof(*p); |
1045 | mp_req.num_fds = 0; |
1046 | |
1047 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && |
1048 | mp_reply.nb_received == 1) { |
1049 | mp_rep = &mp_reply.msgs[0]; |
1050 | p = (struct vfio_mp_param *)mp_rep->param; |
1051 | if (p->result == SOCKET_OK0x0 && mp_rep->num_fds == 1) { |
1052 | free(mp_reply.msgs); |
1053 | return mp_rep->fds[0]; |
1054 | } |
1055 | free(mp_reply.msgs); |
1056 | } |
1057 | |
1058 | RTE_LOG(ERR, EAL, " cannot request default container fd\n")rte_log(4U, 0, "EAL" ": " " cannot request default container fd\n" ); |
1059 | return -1; |
1060 | } |
1061 | |
1062 | int |
1063 | vfio_get_iommu_type(void) |
1064 | { |
1065 | if (default_vfio_cfg->vfio_iommu_type == NULL((void*)0)) |
1066 | return -1; |
1067 | |
1068 | return default_vfio_cfg->vfio_iommu_type->type_id; |
1069 | } |
1070 | |
1071 | const struct vfio_iommu_type * |
1072 | vfio_set_iommu_type(int vfio_container_fd) |
1073 | { |
1074 | unsigned idx; |
1075 | for (idx = 0; idx < RTE_DIM(iommu_types)(sizeof (iommu_types) / sizeof ((iommu_types)[0])); idx++) { |
1076 | const struct vfio_iommu_type *t = &iommu_types[idx]; |
1077 | |
1078 | int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 2)) << 0) | ((0) << ((0 +8)+8))), |
1079 | t->type_id); |
1080 | if (!ret) { |
1081 | RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",rte_log(6U, 0, "EAL" ": " " using IOMMU type %d (%s)\n", t-> type_id, t->name) |
1082 | t->type_id, t->name)rte_log(6U, 0, "EAL" ": " " using IOMMU type %d (%s)\n", t-> type_id, t->name); |
1083 | return t; |
1084 | } |
1085 | /* not an error, there may be more supported IOMMU types */ |
1086 | RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, "rte_log(8U, 0, "EAL" ": " " set IOMMU type %d (%s) failed, " "error %i (%s)\n", t->type_id, t->name, (*__errno_location ()), strerror((*__errno_location ()))) |
1087 | "error %i (%s)\n", t->type_id, t->name, errno,rte_log(8U, 0, "EAL" ": " " set IOMMU type %d (%s) failed, " "error %i (%s)\n", t->type_id, t->name, (*__errno_location ()), strerror((*__errno_location ()))) |
1088 | strerror(errno))rte_log(8U, 0, "EAL" ": " " set IOMMU type %d (%s) failed, " "error %i (%s)\n", t->type_id, t->name, (*__errno_location ()), strerror((*__errno_location ()))); |
1089 | } |
1090 | /* if we didn't find a suitable IOMMU type, fail */ |
1091 | return NULL((void*)0); |
1092 | } |
1093 | |
1094 | int |
1095 | vfio_has_supported_extensions(int vfio_container_fd) |
1096 | { |
1097 | int ret; |
1098 | unsigned idx, n_extensions = 0; |
1099 | for (idx = 0; idx < RTE_DIM(iommu_types)(sizeof (iommu_types) / sizeof ((iommu_types)[0])); idx++) { |
1100 | const struct vfio_iommu_type *t = &iommu_types[idx]; |
1101 | |
1102 | ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 1)) << 0) | ((0) << ((0 +8)+8))), |
1103 | t->type_id); |
1104 | if (ret < 0) { |
1105 | RTE_LOG(ERR, EAL, " could not get IOMMU type, "rte_log(4U, 0, "EAL" ": " " could not get IOMMU type, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1106 | "error %i (%s)\n", errno,rte_log(4U, 0, "EAL" ": " " could not get IOMMU type, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1107 | strerror(errno))rte_log(4U, 0, "EAL" ": " " could not get IOMMU type, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1108 | close(vfio_container_fd); |
1109 | return -1; |
1110 | } else if (ret == 1) { |
1111 | /* we found a supported extension */ |
1112 | n_extensions++; |
1113 | } |
1114 | RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n",rte_log(8U, 0, "EAL" ": " " IOMMU type %d (%s) is %s\n", t-> type_id, t->name, ret ? "supported" : "not supported") |
1115 | t->type_id, t->name,rte_log(8U, 0, "EAL" ": " " IOMMU type %d (%s) is %s\n", t-> type_id, t->name, ret ? "supported" : "not supported") |
1116 | ret ? "supported" : "not supported")rte_log(8U, 0, "EAL" ": " " IOMMU type %d (%s) is %s\n", t-> type_id, t->name, ret ? "supported" : "not supported"); |
1117 | } |
1118 | |
1119 | /* if we didn't find any supported IOMMU types, fail */ |
1120 | if (!n_extensions) { |
1121 | close(vfio_container_fd); |
1122 | return -1; |
1123 | } |
1124 | |
1125 | return 0; |
1126 | } |
1127 | |
1128 | int |
1129 | rte_vfio_get_container_fd(void) |
1130 | { |
1131 | int ret, vfio_container_fd; |
1132 | struct rte_mp_msg mp_req, *mp_rep; |
1133 | struct rte_mp_reply mp_reply; |
1134 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; |
1135 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; |
1136 | |
1137 | |
1138 | /* if we're in a primary process, try to open the container */ |
1139 | if (internal_config.process_type == RTE_PROC_PRIMARY) { |
1140 | vfio_container_fd = open(VFIO_CONTAINER_PATH"/dev/vfio/vfio", O_RDWR02); |
1141 | if (vfio_container_fd < 0) { |
1142 | RTE_LOG(ERR, EAL, " cannot open VFIO container, "rte_log(4U, 0, "EAL" ": " " cannot open VFIO container, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1143 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot open VFIO container, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1144 | return -1; |
1145 | } |
1146 | |
1147 | /* check VFIO API version */ |
1148 | ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 0)) << 0) | ((0) << ((0 +8)+8)))); |
1149 | if (ret != VFIO_API_VERSION0) { |
1150 | if (ret < 0) |
1151 | RTE_LOG(ERR, EAL, " could not get VFIO API version, "rte_log(4U, 0, "EAL" ": " " could not get VFIO API version, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))) |
1152 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " could not get VFIO API version, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))); |
1153 | else |
1154 | RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n")rte_log(4U, 0, "EAL" ": " " unsupported VFIO API version!\n" ); |
1155 | close(vfio_container_fd); |
1156 | return -1; |
1157 | } |
1158 | |
1159 | ret = vfio_has_supported_extensions(vfio_container_fd); |
1160 | if (ret) { |
1161 | RTE_LOG(ERR, EAL, " no supported IOMMU "rte_log(4U, 0, "EAL" ": " " no supported IOMMU " "extensions found!\n" ) |
1162 | "extensions found!\n")rte_log(4U, 0, "EAL" ": " " no supported IOMMU " "extensions found!\n" ); |
1163 | return -1; |
1164 | } |
1165 | |
1166 | return vfio_container_fd; |
1167 | } |
1168 | /* |
1169 | * if we're in a secondary process, request container fd from the |
1170 | * primary process via mp channel |
1171 | */ |
1172 | p->req = SOCKET_REQ_CONTAINER0x100; |
1173 | strcpy(mp_req.name, EAL_VFIO_MP"eal_vfio_mp_sync"); |
1174 | mp_req.len_param = sizeof(*p); |
1175 | mp_req.num_fds = 0; |
1176 | |
1177 | vfio_container_fd = -1; |
Value stored to 'vfio_container_fd' is never read | |
1178 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && |
1179 | mp_reply.nb_received == 1) { |
1180 | mp_rep = &mp_reply.msgs[0]; |
1181 | p = (struct vfio_mp_param *)mp_rep->param; |
1182 | if (p->result == SOCKET_OK0x0 && mp_rep->num_fds == 1) { |
1183 | vfio_container_fd = mp_rep->fds[0]; |
1184 | free(mp_reply.msgs); |
1185 | return vfio_container_fd; |
1186 | } |
1187 | free(mp_reply.msgs); |
1188 | } |
1189 | |
1190 | RTE_LOG(ERR, EAL, " cannot request container fd\n")rte_log(4U, 0, "EAL" ": " " cannot request container fd\n"); |
1191 | return -1; |
1192 | } |
1193 | |
1194 | int |
1195 | rte_vfio_get_group_num(const char *sysfs_base, |
1196 | const char *dev_addr, int *iommu_group_num) |
1197 | { |
1198 | char linkname[PATH_MAX4096]; |
1199 | char filename[PATH_MAX4096]; |
1200 | char *tok[16], *group_tok, *end; |
1201 | int ret; |
1202 | |
1203 | memset(linkname, 0, sizeof(linkname)); |
1204 | memset(filename, 0, sizeof(filename)); |
1205 | |
1206 | /* try to find out IOMMU group for this device */ |
1207 | snprintf(linkname, sizeof(linkname), |
1208 | "%s/%s/iommu_group", sysfs_base, dev_addr); |
1209 | |
1210 | ret = readlink(linkname, filename, sizeof(filename)); |
1211 | |
1212 | /* if the link doesn't exist, no VFIO for us */ |
1213 | if (ret < 0) |
1214 | return 0; |
1215 | |
1216 | ret = rte_strsplit(filename, sizeof(filename), |
1217 | tok, RTE_DIM(tok)(sizeof (tok) / sizeof ((tok)[0])), '/'); |
1218 | |
1219 | if (ret <= 0) { |
1220 | RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", dev_addr)rte_log(4U, 0, "EAL" ": " " %s cannot get IOMMU group\n", dev_addr ); |
1221 | return -1; |
1222 | } |
1223 | |
1224 | /* IOMMU group is always the last token */ |
1225 | errno(*__errno_location ()) = 0; |
1226 | group_tok = tok[ret - 1]; |
1227 | end = group_tok; |
1228 | *iommu_group_num = strtol(group_tok, &end, 10); |
1229 | if ((end != group_tok && *end != '\0') || errno(*__errno_location ()) != 0) { |
1230 | RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr)rte_log(4U, 0, "EAL" ": " " %s error parsing IOMMU number!\n" , dev_addr); |
1231 | return -1; |
1232 | } |
1233 | |
1234 | return 1; |
1235 | } |
1236 | |
1237 | static int |
1238 | type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms, |
1239 | void *arg) |
1240 | { |
1241 | int *vfio_container_fd = arg; |
1242 | |
1243 | if (msl->external) |
1244 | return 0; |
1245 | |
1246 | return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova, |
1247 | ms->len, 1); |
1248 | } |
1249 | |
1250 | static int |
1251 | vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, |
1252 | uint64_t len, int do_map) |
1253 | { |
1254 | struct vfio_iommu_type1_dma_map dma_map; |
1255 | struct vfio_iommu_type1_dma_unmap dma_unmap; |
1256 | int ret; |
1257 | |
1258 | if (do_map != 0) { |
1259 | memset(&dma_map, 0, sizeof(dma_map)); |
1260 | dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); |
1261 | dma_map.vaddr = vaddr; |
1262 | dma_map.size = len; |
1263 | dma_map.iova = iova; |
1264 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ(1 << 0) | |
1265 | VFIO_DMA_MAP_FLAG_WRITE(1 << 1); |
1266 | |
1267 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 13)) << 0) | ((0) << ((0 +8)+8))), &dma_map); |
1268 | if (ret) { |
1269 | /** |
1270 | * In case the mapping was already done EEXIST will be |
1271 | * returned from kernel. |
1272 | */ |
1273 | if (errno(*__errno_location ()) == EEXIST17) { |
1274 | RTE_LOG(DEBUG, EAL,rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping") |
1275 | " Memory segment is allready mapped,"rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping") |
1276 | " skipping")rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping"); |
1277 | } else { |
1278 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1279 | " cannot set up DMA remapping,"rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1280 | " error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1281 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1282 | return -1; |
1283 | } |
1284 | } |
1285 | } else { |
1286 | memset(&dma_unmap, 0, sizeof(dma_unmap)); |
1287 | dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); |
1288 | dma_unmap.size = len; |
1289 | dma_unmap.iova = iova; |
1290 | |
1291 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 14)) << 0) | ((0) << ((0 +8)+8))), |
1292 | &dma_unmap); |
1293 | if (ret) { |
1294 | RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot clear DMA remapping, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1295 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot clear DMA remapping, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1296 | return -1; |
1297 | } |
1298 | } |
1299 | |
1300 | return 0; |
1301 | } |
1302 | |
1303 | static int |
1304 | vfio_type1_dma_map(int vfio_container_fd) |
1305 | { |
1306 | return rte_memseg_walk(type1_map, &vfio_container_fd); |
1307 | } |
1308 | |
1309 | static int |
1310 | vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, |
1311 | uint64_t len, int do_map) |
1312 | { |
1313 | struct vfio_iommu_type1_dma_map dma_map; |
1314 | struct vfio_iommu_type1_dma_unmap dma_unmap; |
1315 | int ret; |
1316 | struct vfio_iommu_spapr_register_memory reg = { |
1317 | .argsz = sizeof(reg), |
1318 | .flags = 0 |
1319 | }; |
1320 | reg.vaddr = (uintptr_t) vaddr; |
1321 | reg.size = len; |
1322 | |
1323 | if (do_map != 0) { |
1324 | ret = ioctl(vfio_container_fd, |
1325 | VFIO_IOMMU_SPAPR_REGISTER_MEMORY(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 17)) << 0) | ((0) << ((0 +8)+8))), ®); |
1326 | if (ret) { |
1327 | RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, "rte_log(4U, 0, "EAL" ": " " cannot register vaddr for IOMMU, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))) |
1328 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot register vaddr for IOMMU, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))); |
1329 | return -1; |
1330 | } |
1331 | |
1332 | memset(&dma_map, 0, sizeof(dma_map)); |
1333 | dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); |
1334 | dma_map.vaddr = vaddr; |
1335 | dma_map.size = len; |
1336 | dma_map.iova = iova; |
1337 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ(1 << 0) | |
1338 | VFIO_DMA_MAP_FLAG_WRITE(1 << 1); |
1339 | |
1340 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 13)) << 0) | ((0) << ((0 +8)+8))), &dma_map); |
1341 | if (ret) { |
1342 | /** |
1343 | * In case the mapping was already done EBUSY will be |
1344 | * returned from kernel. |
1345 | */ |
1346 | if (errno(*__errno_location ()) == EBUSY16) { |
1347 | RTE_LOG(DEBUG, EAL,rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping") |
1348 | " Memory segment is allready mapped,"rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping") |
1349 | " skipping")rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping"); |
1350 | } else { |
1351 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1352 | " cannot set up DMA remapping,"rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1353 | " error %i (%s)\n", errno,rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1354 | strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1355 | return -1; |
1356 | } |
1357 | } |
1358 | |
1359 | } else { |
1360 | ret = ioctl(vfio_container_fd, |
1361 | VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 18)) << 0) | ((0) << ((0 +8)+8))), ®); |
1362 | if (ret) { |
1363 | RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot unregister vaddr for IOMMU, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1364 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot unregister vaddr for IOMMU, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1365 | return -1; |
1366 | } |
1367 | |
1368 | memset(&dma_unmap, 0, sizeof(dma_unmap)); |
1369 | dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); |
1370 | dma_unmap.size = len; |
1371 | dma_unmap.iova = iova; |
1372 | |
1373 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 14)) << 0) | ((0) << ((0 +8)+8))), |
1374 | &dma_unmap); |
1375 | if (ret) { |
1376 | RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot clear DMA remapping, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1377 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot clear DMA remapping, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1378 | return -1; |
1379 | } |
1380 | } |
1381 | |
1382 | return 0; |
1383 | } |
1384 | |
1385 | static int |
1386 | vfio_spapr_map_walk(const struct rte_memseg_list *msl, |
1387 | const struct rte_memseg *ms, void *arg) |
1388 | { |
1389 | int *vfio_container_fd = arg; |
1390 | |
1391 | if (msl->external) |
1392 | return 0; |
1393 | |
1394 | return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova, |
1395 | ms->len, 1); |
1396 | } |
1397 | |
1398 | struct spapr_walk_param { |
1399 | uint64_t window_size; |
1400 | uint64_t hugepage_sz; |
1401 | }; |
1402 | static int |
1403 | vfio_spapr_window_size_walk(const struct rte_memseg_list *msl, |
1404 | const struct rte_memseg *ms, void *arg) |
1405 | { |
1406 | struct spapr_walk_param *param = arg; |
1407 | uint64_t max = ms->iova + ms->len; |
1408 | |
1409 | if (msl->external) |
1410 | return 0; |
1411 | |
1412 | if (max > param->window_size) { |
1413 | param->hugepage_sz = ms->hugepage_sz; |
1414 | param->window_size = max; |
1415 | } |
1416 | |
1417 | return 0; |
1418 | } |
1419 | |
1420 | static int |
1421 | vfio_spapr_create_new_dma_window(int vfio_container_fd, |
1422 | struct vfio_iommu_spapr_tce_create *create) { |
1423 | struct vfio_iommu_spapr_tce_remove remove = { |
1424 | .argsz = sizeof(remove), |
1425 | }; |
1426 | struct vfio_iommu_spapr_tce_info info = { |
1427 | .argsz = sizeof(info), |
1428 | }; |
1429 | int ret; |
1430 | |
1431 | /* query spapr iommu info */ |
1432 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 12)) << 0) | ((0) << ((0 +8)+8))), &info); |
1433 | if (ret) { |
1434 | RTE_LOG(ERR, EAL, " cannot get iommu info, "rte_log(4U, 0, "EAL" ": " " cannot get iommu info, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1435 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot get iommu info, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1436 | return -1; |
1437 | } |
1438 | |
1439 | /* remove default DMA of 32 bit window */ |
1440 | remove.start_addr = info.dma32_window_start; |
1441 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 20)) << 0) | ((0) << ((0 +8)+8))), &remove); |
1442 | if (ret) { |
1443 | RTE_LOG(ERR, EAL, " cannot remove default DMA window, "rte_log(4U, 0, "EAL" ": " " cannot remove default DMA window, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))) |
1444 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot remove default DMA window, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))); |
1445 | return -1; |
1446 | } |
1447 | |
1448 | /* create new DMA window */ |
1449 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 19)) << 0) | ((0) << ((0 +8)+8))), create); |
1450 | if (ret) { |
1451 | RTE_LOG(ERR, EAL, " cannot create new DMA window, "rte_log(4U, 0, "EAL" ": " " cannot create new DMA window, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1452 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot create new DMA window, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1453 | return -1; |
1454 | } |
1455 | |
1456 | if (create->start_addr != 0) { |
1457 | RTE_LOG(ERR, EAL, " DMA window start address != 0\n")rte_log(4U, 0, "EAL" ": " " DMA window start address != 0\n" ); |
1458 | return -1; |
1459 | } |
1460 | |
1461 | return 0; |
1462 | } |
1463 | |
1464 | static int |
1465 | vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, |
1466 | uint64_t len, int do_map) |
1467 | { |
1468 | struct spapr_walk_param param; |
1469 | struct vfio_iommu_spapr_tce_create create = { |
1470 | .argsz = sizeof(create), |
1471 | }; |
1472 | struct vfio_config *vfio_cfg; |
1473 | struct user_mem_maps *user_mem_maps; |
1474 | int i, ret = 0; |
1475 | |
1476 | vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd); |
1477 | if (vfio_cfg == NULL((void*)0)) { |
1478 | RTE_LOG(ERR, EAL, " invalid container fd!\n")rte_log(4U, 0, "EAL" ": " " invalid container fd!\n"); |
1479 | return -1; |
1480 | } |
1481 | |
1482 | user_mem_maps = &vfio_cfg->mem_maps; |
1483 | rte_spinlock_recursive_lock(&user_mem_maps->lock); |
1484 | |
1485 | /* check if window size needs to be adjusted */ |
1486 | memset(¶m, 0, sizeof(param)); |
1487 | |
1488 | /* we're inside a callback so use thread-unsafe version */ |
1489 | if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk, |
1490 | ¶m) < 0) { |
1491 | RTE_LOG(ERR, EAL, "Could not get window size\n")rte_log(4U, 0, "EAL" ": " "Could not get window size\n"); |
1492 | ret = -1; |
1493 | goto out; |
1494 | } |
1495 | |
1496 | /* also check user maps */ |
1497 | for (i = 0; i < user_mem_maps->n_maps; i++) { |
1498 | uint64_t max = user_mem_maps->maps[i].iova + |
1499 | user_mem_maps->maps[i].len; |
1500 | create.window_size = RTE_MAX(create.window_size, max)__extension__ ({ __typeof__ (create.window_size) _a = (create .window_size); __typeof__ (max) _b = (max); _a > _b ? _a : _b; }); |
1501 | } |
1502 | |
1503 | /* sPAPR requires window size to be a power of 2 */ |
1504 | create.window_size = rte_align64pow2(param.window_size); |
1505 | create.page_shift = __builtin_ctzll(param.hugepage_sz); |
1506 | create.levels = 1; |
1507 | |
1508 | if (do_map) { |
1509 | void *addr; |
1510 | /* re-create window and remap the entire memory */ |
1511 | if (iova > create.window_size) { |
1512 | if (vfio_spapr_create_new_dma_window(vfio_container_fd, |
1513 | &create) < 0) { |
1514 | RTE_LOG(ERR, EAL, "Could not create new DMA window\n")rte_log(4U, 0, "EAL" ": " "Could not create new DMA window\n" ); |
1515 | ret = -1; |
1516 | goto out; |
1517 | } |
1518 | /* we're inside a callback, so use thread-unsafe version |
1519 | */ |
1520 | if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk, |
1521 | &vfio_container_fd) < 0) { |
1522 | RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n")rte_log(4U, 0, "EAL" ": " "Could not recreate DMA maps\n"); |
1523 | ret = -1; |
1524 | goto out; |
1525 | } |
1526 | /* remap all user maps */ |
1527 | for (i = 0; i < user_mem_maps->n_maps; i++) { |
1528 | struct user_mem_map *map = |
1529 | &user_mem_maps->maps[i]; |
1530 | if (vfio_spapr_dma_do_map(vfio_container_fd, |
1531 | map->addr, map->iova, map->len, |
1532 | 1)) { |
1533 | RTE_LOG(ERR, EAL, "Could not recreate user DMA maps\n")rte_log(4U, 0, "EAL" ": " "Could not recreate user DMA maps\n" ); |
1534 | ret = -1; |
1535 | goto out; |
1536 | } |
1537 | } |
1538 | } |
1539 | |
1540 | /* now that we've remapped all of the memory that was present |
1541 | * before, map the segment that we were requested to map. |
1542 | * |
1543 | * however, if we were called by the callback, the memory we |
1544 | * were called with was already in the memseg list, so previous |
1545 | * mapping should've mapped that segment already. |
1546 | * |
1547 | * virt2memseg_list is a relatively cheap check, so use that. if |
1548 | * memory is within any memseg list, it's a memseg, so it's |
1549 | * already mapped. |
1550 | */ |
1551 | addr = (void *)(uintptr_t)vaddr; |
1552 | if (rte_mem_virt2memseg_list(addr) == NULL((void*)0) && |
1553 | vfio_spapr_dma_do_map(vfio_container_fd, |
1554 | vaddr, iova, len, 1) < 0) { |
1555 | RTE_LOG(ERR, EAL, "Could not map segment\n")rte_log(4U, 0, "EAL" ": " "Could not map segment\n"); |
1556 | ret = -1; |
1557 | goto out; |
1558 | } |
1559 | } else { |
1560 | /* for unmap, check if iova within DMA window */ |
1561 | if (iova > create.window_size) { |
1562 | RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap")rte_log(4U, 0, "EAL" ": " "iova beyond DMA window for unmap"); |
1563 | ret = -1; |
1564 | goto out; |
1565 | } |
1566 | |
1567 | vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0); |
1568 | } |
1569 | out: |
1570 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); |
1571 | return ret; |
1572 | } |
1573 | |
1574 | static int |
1575 | vfio_spapr_dma_map(int vfio_container_fd) |
1576 | { |
1577 | struct vfio_iommu_spapr_tce_create create = { |
1578 | .argsz = sizeof(create), |
1579 | }; |
1580 | struct spapr_walk_param param; |
1581 | |
1582 | memset(¶m, 0, sizeof(param)); |
1583 | |
1584 | /* create DMA window from 0 to max(phys_addr + len) */ |
1585 | rte_memseg_walk(vfio_spapr_window_size_walk, ¶m); |
1586 | |
1587 | /* sPAPR requires window size to be a power of 2 */ |
1588 | create.window_size = rte_align64pow2(param.window_size); |
1589 | create.page_shift = __builtin_ctzll(param.hugepage_sz); |
1590 | create.levels = 1; |
1591 | |
1592 | if (vfio_spapr_create_new_dma_window(vfio_container_fd, &create) < 0) { |
1593 | RTE_LOG(ERR, EAL, "Could not create new DMA window\n")rte_log(4U, 0, "EAL" ": " "Could not create new DMA window\n" ); |
1594 | return -1; |
1595 | } |
1596 | |
1597 | /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ |
1598 | if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0) |
1599 | return -1; |
1600 | |
1601 | return 0; |
1602 | } |
1603 | |
1604 | static int |
1605 | vfio_noiommu_dma_map(int __rte_unused__attribute__((__unused__)) vfio_container_fd) |
1606 | { |
1607 | /* No-IOMMU mode does not need DMA mapping */ |
1608 | return 0; |
1609 | } |
1610 | |
1611 | static int |
1612 | vfio_noiommu_dma_mem_map(int __rte_unused__attribute__((__unused__)) vfio_container_fd, |
1613 | uint64_t __rte_unused__attribute__((__unused__)) vaddr, |
1614 | uint64_t __rte_unused__attribute__((__unused__)) iova, uint64_t __rte_unused__attribute__((__unused__)) len, |
1615 | int __rte_unused__attribute__((__unused__)) do_map) |
1616 | { |
1617 | /* No-IOMMU mode does not need DMA mapping */ |
1618 | return 0; |
1619 | } |
1620 | |
1621 | static int |
1622 | vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, |
1623 | uint64_t len, int do_map) |
1624 | { |
1625 | const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type; |
1626 | |
1627 | if (!t) { |
1628 | RTE_LOG(ERR, EAL, " VFIO support not initialized\n")rte_log(4U, 0, "EAL" ": " " VFIO support not initialized\n"); |
1629 | rte_errno(per_lcore__rte_errno) = ENODEV19; |
1630 | return -1; |
1631 | } |
1632 | |
1633 | if (!t->dma_user_map_func) { |
1634 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " VFIO custom DMA region maping not supported by IOMMU %s\n" , t->name) |
1635 | " VFIO custom DMA region maping not supported by IOMMU %s\n",rte_log(4U, 0, "EAL" ": " " VFIO custom DMA region maping not supported by IOMMU %s\n" , t->name) |
1636 | t->name)rte_log(4U, 0, "EAL" ": " " VFIO custom DMA region maping not supported by IOMMU %s\n" , t->name); |
1637 | rte_errno(per_lcore__rte_errno) = ENOTSUP95; |
1638 | return -1; |
1639 | } |
1640 | |
1641 | return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova, |
1642 | len, do_map); |
1643 | } |
1644 | |
1645 | static int |
1646 | container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, |
1647 | uint64_t len) |
1648 | { |
1649 | struct user_mem_map *new_map; |
1650 | struct user_mem_maps *user_mem_maps; |
1651 | int ret = 0; |
1652 | |
1653 | user_mem_maps = &vfio_cfg->mem_maps; |
1654 | rte_spinlock_recursive_lock(&user_mem_maps->lock); |
1655 | if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS256) { |
1656 | RTE_LOG(ERR, EAL, "No more space for user mem maps\n")rte_log(4U, 0, "EAL" ": " "No more space for user mem maps\n" ); |
1657 | rte_errno(per_lcore__rte_errno) = ENOMEM12; |
1658 | ret = -1; |
1659 | goto out; |
1660 | } |
1661 | /* map the entry */ |
1662 | if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) { |
1663 | /* technically, this will fail if there are currently no devices |
1664 | * plugged in, even if a device were added later, this mapping |
1665 | * might have succeeded. however, since we cannot verify if this |
1666 | * is a valid mapping without having a device attached, consider |
1667 | * this to be unsupported, because we can't just store any old |
1668 | * mapping and pollute list of active mappings willy-nilly. |
1669 | */ |
1670 | RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n")rte_log(4U, 0, "EAL" ": " "Couldn't map new region for DMA\n" ); |
1671 | ret = -1; |
1672 | goto out; |
1673 | } |
1674 | /* create new user mem map entry */ |
1675 | new_map = &user_mem_maps->maps[user_mem_maps->n_maps++]; |
1676 | new_map->addr = vaddr; |
1677 | new_map->iova = iova; |
1678 | new_map->len = len; |
1679 | |
1680 | compact_user_maps(user_mem_maps); |
1681 | out: |
1682 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); |
1683 | return ret; |
1684 | } |
1685 | |
1686 | static int |
1687 | container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, |
1688 | uint64_t len) |
1689 | { |
1690 | struct user_mem_map *map, *new_map = NULL((void*)0); |
1691 | struct user_mem_maps *user_mem_maps; |
1692 | int ret = 0; |
1693 | |
1694 | user_mem_maps = &vfio_cfg->mem_maps; |
1695 | rte_spinlock_recursive_lock(&user_mem_maps->lock); |
1696 | |
1697 | /* find our mapping */ |
1698 | map = find_user_mem_map(user_mem_maps, vaddr, iova, len); |
1699 | if (!map) { |
1700 | RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n")rte_log(4U, 0, "EAL" ": " "Couldn't find previously mapped region\n" ); |
1701 | rte_errno(per_lcore__rte_errno) = EINVAL22; |
1702 | ret = -1; |
1703 | goto out; |
1704 | } |
1705 | if (map->addr != vaddr || map->iova != iova || map->len != len) { |
1706 | /* we're partially unmapping a previously mapped region, so we |
1707 | * need to split entry into two. |
1708 | */ |
1709 | if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS256) { |
1710 | RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n")rte_log(4U, 0, "EAL" ": " "Not enough space to store partial mapping\n" ); |
1711 | rte_errno(per_lcore__rte_errno) = ENOMEM12; |
1712 | ret = -1; |
1713 | goto out; |
1714 | } |
1715 | new_map = &user_mem_maps->maps[user_mem_maps->n_maps++]; |
1716 | } |
1717 | |
1718 | /* unmap the entry */ |
1719 | if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) { |
1720 | /* there may not be any devices plugged in, so unmapping will |
1721 | * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't |
1722 | * stop us from removing the mapping, as the assumption is we |
1723 | * won't be needing this memory any more and thus will want to |
1724 | * prevent it from being remapped again on hotplug. so, only |
1725 | * fail if we indeed failed to unmap (e.g. if the mapping was |
1726 | * within our mapped range but had invalid alignment). |
1727 | */ |
1728 | if (rte_errno(per_lcore__rte_errno) != ENODEV19 && rte_errno(per_lcore__rte_errno) != ENOTSUP95) { |
1729 | RTE_LOG(ERR, EAL, "Couldn't unmap region for DMA\n")rte_log(4U, 0, "EAL" ": " "Couldn't unmap region for DMA\n"); |
1730 | ret = -1; |
1731 | goto out; |
1732 | } else { |
1733 | RTE_LOG(DEBUG, EAL, "DMA unmapping failed, but removing mappings anyway\n")rte_log(8U, 0, "EAL" ": " "DMA unmapping failed, but removing mappings anyway\n" ); |
1734 | } |
1735 | } |
1736 | /* remove map from the list of active mappings */ |
1737 | if (new_map != NULL((void*)0)) { |
1738 | adjust_map(map, new_map, vaddr, len); |
1739 | |
1740 | /* if we've created a new map by splitting, sort everything */ |
1741 | if (!is_null_map(new_map)) { |
1742 | compact_user_maps(user_mem_maps); |
1743 | } else { |
1744 | /* we've created a new mapping, but it was unused */ |
1745 | user_mem_maps->n_maps--; |
1746 | } |
1747 | } else { |
1748 | memset(map, 0, sizeof(*map)); |
1749 | compact_user_maps(user_mem_maps); |
1750 | user_mem_maps->n_maps--; |
1751 | } |
1752 | |
1753 | out: |
1754 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); |
1755 | return ret; |
1756 | } |
1757 | |
1758 | int |
1759 | rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) |
1760 | { |
1761 | if (len == 0) { |
1762 | rte_errno(per_lcore__rte_errno) = EINVAL22; |
1763 | return -1; |
1764 | } |
1765 | |
1766 | return container_dma_map(default_vfio_cfg, vaddr, iova, len); |
1767 | } |
1768 | |
1769 | int |
1770 | rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) |
1771 | { |
1772 | if (len == 0) { |
1773 | rte_errno(per_lcore__rte_errno) = EINVAL22; |
1774 | return -1; |
1775 | } |
1776 | |
1777 | return container_dma_unmap(default_vfio_cfg, vaddr, iova, len); |
1778 | } |
1779 | |
1780 | int |
1781 | rte_vfio_noiommu_is_enabled(void) |
1782 | { |
1783 | int fd; |
1784 | ssize_t cnt; |
1785 | char c; |
1786 | |
1787 | fd = open(VFIO_NOIOMMU_MODE"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode", O_RDONLY00); |
1788 | if (fd < 0) { |
1789 | if (errno(*__errno_location ()) != ENOENT2) { |
1790 | RTE_LOG(ERR, EAL, " cannot open vfio noiommu file %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot open vfio noiommu file %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
1791 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot open vfio noiommu file %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
1792 | return -1; |
1793 | } |
1794 | /* |
1795 | * else the file does not exists |
1796 | * i.e. noiommu is not enabled |
1797 | */ |
1798 | return 0; |
1799 | } |
1800 | |
1801 | cnt = read(fd, &c, 1); |
1802 | close(fd); |
1803 | if (cnt != 1) { |
1804 | RTE_LOG(ERR, EAL, " unable to read from vfio noiommu "rte_log(4U, 0, "EAL" ": " " unable to read from vfio noiommu " "file %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))) |
1805 | "file %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " unable to read from vfio noiommu " "file %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))); |
1806 | return -1; |
1807 | } |
1808 | |
1809 | return c == 'Y'; |
1810 | } |
1811 | |
1812 | int |
1813 | rte_vfio_container_create(void) |
1814 | { |
1815 | int i; |
1816 | |
1817 | /* Find an empty slot to store new vfio config */ |
1818 | for (i = 1; i < VFIO_MAX_CONTAINERS64; i++) { |
1819 | if (vfio_cfgs[i].vfio_container_fd == -1) |
1820 | break; |
1821 | } |
1822 | |
1823 | if (i == VFIO_MAX_CONTAINERS64) { |
1824 | RTE_LOG(ERR, EAL, "exceed max vfio container limit\n")rte_log(4U, 0, "EAL" ": " "exceed max vfio container limit\n" ); |
1825 | return -1; |
1826 | } |
1827 | |
1828 | vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd(); |
1829 | if (vfio_cfgs[i].vfio_container_fd < 0) { |
1830 | RTE_LOG(NOTICE, EAL, "fail to create a new container\n")rte_log(6U, 0, "EAL" ": " "fail to create a new container\n"); |
1831 | return -1; |
1832 | } |
1833 | |
1834 | return vfio_cfgs[i].vfio_container_fd; |
1835 | } |
1836 | |
1837 | int __rte_experimental__attribute__((section(".text.experimental"))) |
1838 | rte_vfio_container_destroy(int container_fd) |
1839 | { |
1840 | struct vfio_config *vfio_cfg; |
1841 | int i; |
1842 | |
1843 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); |
1844 | if (vfio_cfg == NULL((void*)0)) { |
1845 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); |
1846 | return -1; |
1847 | } |
1848 | |
1849 | for (i = 0; i < VFIO_MAX_GROUPS64; i++) |
1850 | if (vfio_cfg->vfio_groups[i].group_num != -1) |
1851 | rte_vfio_container_group_unbind(container_fd, |
1852 | vfio_cfg->vfio_groups[i].group_num); |
1853 | |
1854 | close(container_fd); |
1855 | vfio_cfg->vfio_container_fd = -1; |
1856 | vfio_cfg->vfio_active_groups = 0; |
1857 | vfio_cfg->vfio_iommu_type = NULL((void*)0); |
1858 | |
1859 | return 0; |
1860 | } |
1861 | |
1862 | int |
1863 | rte_vfio_container_group_bind(int container_fd, int iommu_group_num) |
1864 | { |
1865 | struct vfio_config *vfio_cfg; |
1866 | |
1867 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); |
1868 | if (vfio_cfg == NULL((void*)0)) { |
1869 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); |
1870 | return -1; |
1871 | } |
1872 | |
1873 | return vfio_get_group_fd(vfio_cfg, iommu_group_num); |
1874 | } |
1875 | |
1876 | int |
1877 | rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) |
1878 | { |
1879 | struct vfio_config *vfio_cfg; |
1880 | struct vfio_group *cur_grp = NULL((void*)0); |
1881 | int i; |
1882 | |
1883 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); |
1884 | if (vfio_cfg == NULL((void*)0)) { |
1885 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); |
1886 | return -1; |
1887 | } |
1888 | |
1889 | for (i = 0; i < VFIO_MAX_GROUPS64; i++) { |
1890 | if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) { |
1891 | cur_grp = &vfio_cfg->vfio_groups[i]; |
1892 | break; |
1893 | } |
1894 | } |
1895 | |
1896 | /* This should not happen */ |
1897 | if (i == VFIO_MAX_GROUPS64 || cur_grp == NULL((void*)0)) { |
1898 | RTE_LOG(ERR, EAL, "Specified group number not found\n")rte_log(4U, 0, "EAL" ": " "Specified group number not found\n" ); |
1899 | return -1; |
1900 | } |
1901 | |
1902 | if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) { |
1903 | RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for"rte_log(4U, 0, "EAL" ": " "Error when closing vfio_group_fd for" " iommu_group_num %d\n", iommu_group_num) |
1904 | " iommu_group_num %d\n", iommu_group_num)rte_log(4U, 0, "EAL" ": " "Error when closing vfio_group_fd for" " iommu_group_num %d\n", iommu_group_num); |
1905 | return -1; |
1906 | } |
1907 | cur_grp->group_num = -1; |
1908 | cur_grp->fd = -1; |
1909 | cur_grp->devices = 0; |
1910 | vfio_cfg->vfio_active_groups--; |
1911 | |
1912 | return 0; |
1913 | } |
1914 | |
1915 | int |
1916 | rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova, |
1917 | uint64_t len) |
1918 | { |
1919 | struct vfio_config *vfio_cfg; |
1920 | |
1921 | if (len == 0) { |
1922 | rte_errno(per_lcore__rte_errno) = EINVAL22; |
1923 | return -1; |
1924 | } |
1925 | |
1926 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); |
1927 | if (vfio_cfg == NULL((void*)0)) { |
1928 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); |
1929 | return -1; |
1930 | } |
1931 | |
1932 | return container_dma_map(vfio_cfg, vaddr, iova, len); |
1933 | } |
1934 | |
1935 | int |
1936 | rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova, |
1937 | uint64_t len) |
1938 | { |
1939 | struct vfio_config *vfio_cfg; |
1940 | |
1941 | if (len == 0) { |
1942 | rte_errno(per_lcore__rte_errno) = EINVAL22; |
1943 | return -1; |
1944 | } |
1945 | |
1946 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); |
1947 | if (vfio_cfg == NULL((void*)0)) { |
1948 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); |
1949 | return -1; |
1950 | } |
1951 | |
1952 | return container_dma_unmap(vfio_cfg, vaddr, iova, len); |
1953 | } |
1954 | |
1955 | #else |
1956 | |
1957 | int |
1958 | rte_vfio_dma_map(uint64_t __rte_unused__attribute__((__unused__)) vaddr, __rte_unused__attribute__((__unused__)) uint64_t iova, |
1959 | __rte_unused__attribute__((__unused__)) uint64_t len) |
1960 | { |
1961 | return -1; |
1962 | } |
1963 | |
1964 | int |
1965 | rte_vfio_dma_unmap(uint64_t __rte_unused__attribute__((__unused__)) vaddr, uint64_t __rte_unused__attribute__((__unused__)) iova, |
1966 | __rte_unused__attribute__((__unused__)) uint64_t len) |
1967 | { |
1968 | return -1; |
1969 | } |
1970 | |
1971 | int |
1972 | rte_vfio_setup_device(__rte_unused__attribute__((__unused__)) const char *sysfs_base, |
1973 | __rte_unused__attribute__((__unused__)) const char *dev_addr, |
1974 | __rte_unused__attribute__((__unused__)) int *vfio_dev_fd, |
1975 | __rte_unused__attribute__((__unused__)) struct vfio_device_info *device_info) |
1976 | { |
1977 | return -1; |
1978 | } |
1979 | |
1980 | int |
1981 | rte_vfio_release_device(__rte_unused__attribute__((__unused__)) const char *sysfs_base, |
1982 | __rte_unused__attribute__((__unused__)) const char *dev_addr, __rte_unused__attribute__((__unused__)) int fd) |
1983 | { |
1984 | return -1; |
1985 | } |
1986 | |
1987 | int |
1988 | rte_vfio_enable(__rte_unused__attribute__((__unused__)) const char *modname) |
1989 | { |
1990 | return -1; |
1991 | } |
1992 | |
1993 | int |
1994 | rte_vfio_is_enabled(__rte_unused__attribute__((__unused__)) const char *modname) |
1995 | { |
1996 | return -1; |
1997 | } |
1998 | |
1999 | int |
2000 | rte_vfio_noiommu_is_enabled(void) |
2001 | { |
2002 | return -1; |
2003 | } |
2004 | |
2005 | int |
2006 | rte_vfio_clear_group(__rte_unused__attribute__((__unused__)) int vfio_group_fd) |
2007 | { |
2008 | return -1; |
2009 | } |
2010 | |
2011 | int |
2012 | rte_vfio_get_group_num(__rte_unused__attribute__((__unused__)) const char *sysfs_base, |
2013 | __rte_unused__attribute__((__unused__)) const char *dev_addr, |
2014 | __rte_unused__attribute__((__unused__)) int *iommu_group_num) |
2015 | { |
2016 | return -1; |
2017 | } |
2018 | |
2019 | int |
2020 | rte_vfio_get_container_fd(void) |
2021 | { |
2022 | return -1; |
2023 | } |
2024 | |
2025 | int |
2026 | rte_vfio_get_group_fd(__rte_unused__attribute__((__unused__)) int iommu_group_num) |
2027 | { |
2028 | return -1; |
2029 | } |
2030 | |
2031 | int |
2032 | rte_vfio_container_create(void) |
2033 | { |
2034 | return -1; |
2035 | } |
2036 | |
2037 | int |
2038 | rte_vfio_container_destroy(__rte_unused__attribute__((__unused__)) int container_fd) |
2039 | { |
2040 | return -1; |
2041 | } |
2042 | |
2043 | int |
2044 | rte_vfio_container_group_bind(__rte_unused__attribute__((__unused__)) int container_fd, |
2045 | __rte_unused__attribute__((__unused__)) int iommu_group_num) |
2046 | { |
2047 | return -1; |
2048 | } |
2049 | |
2050 | int |
2051 | rte_vfio_container_group_unbind(__rte_unused__attribute__((__unused__)) int container_fd, |
2052 | __rte_unused__attribute__((__unused__)) int iommu_group_num) |
2053 | { |
2054 | return -1; |
2055 | } |
2056 | |
2057 | int |
2058 | rte_vfio_container_dma_map(__rte_unused__attribute__((__unused__)) int container_fd, |
2059 | __rte_unused__attribute__((__unused__)) uint64_t vaddr, |
2060 | __rte_unused__attribute__((__unused__)) uint64_t iova, |
2061 | __rte_unused__attribute__((__unused__)) uint64_t len) |
2062 | { |
2063 | return -1; |
2064 | } |
2065 | |
2066 | int |
2067 | rte_vfio_container_dma_unmap(__rte_unused__attribute__((__unused__)) int container_fd, |
2068 | __rte_unused__attribute__((__unused__)) uint64_t vaddr, |
2069 | __rte_unused__attribute__((__unused__)) uint64_t iova, |
2070 | __rte_unused__attribute__((__unused__)) uint64_t len) |
2071 | { |
2072 | return -1; |
2073 | } |
2074 | |
2075 | #endif /* VFIO_PRESENT */ |