File: | home/bhubbard/working/src/ceph/src/spdk/dpdk/lib/librte_eal/linux/eal/eal_vfio.c |
Warning: | line 1053, column 11 Use of memory after it is freed |
[?] Use j/k keys for keyboard navigation
1 | /* SPDX-License-Identifier: BSD-3-Clause | |||
2 | * Copyright(c) 2010-2018 Intel Corporation | |||
3 | */ | |||
4 | ||||
5 | #include <inttypes.h> | |||
6 | #include <string.h> | |||
7 | #include <fcntl.h> | |||
8 | #include <unistd.h> | |||
9 | #include <sys/ioctl.h> | |||
10 | ||||
11 | #include <rte_errno(per_lcore__rte_errno).h> | |||
12 | #include <rte_log.h> | |||
13 | #include <rte_memory.h> | |||
14 | #include <rte_eal_memconfig.h> | |||
15 | #include <rte_vfio.h> | |||
16 | ||||
17 | #include "eal_filesystem.h" | |||
18 | #include "eal_vfio.h" | |||
19 | #include "eal_private.h" | |||
20 | ||||
21 | #ifdef VFIO_PRESENT | |||
22 | ||||
23 | #define VFIO_MEM_EVENT_CLB_NAME"vfio_mem_event_clb" "vfio_mem_event_clb" | |||
24 | ||||
25 | /* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can | |||
26 | * recreate the mappings for DPDK segments, but we cannot do so for memory that | |||
27 | * was registered by the user themselves, so we need to store the user mappings | |||
28 | * somewhere, to recreate them later. | |||
29 | */ | |||
30 | #define VFIO_MAX_USER_MEM_MAPS256 256 | |||
31 | struct user_mem_map { | |||
32 | uint64_t addr; | |||
33 | uint64_t iova; | |||
34 | uint64_t len; | |||
35 | }; | |||
36 | ||||
37 | struct user_mem_maps { | |||
38 | rte_spinlock_recursive_t lock; | |||
39 | int n_maps; | |||
40 | struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS256]; | |||
41 | }; | |||
42 | ||||
43 | struct vfio_config { | |||
44 | int vfio_enabled; | |||
45 | int vfio_container_fd; | |||
46 | int vfio_active_groups; | |||
47 | const struct vfio_iommu_type *vfio_iommu_type; | |||
48 | struct vfio_group vfio_groups[VFIO_MAX_GROUPS64]; | |||
49 | struct user_mem_maps mem_maps; | |||
50 | }; | |||
51 | ||||
52 | /* per-process VFIO config */ | |||
53 | static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS64]; | |||
54 | static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0]; | |||
55 | ||||
56 | static int vfio_type1_dma_map(int); | |||
57 | static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); | |||
58 | static int vfio_spapr_dma_map(int); | |||
59 | static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); | |||
60 | static int vfio_noiommu_dma_map(int); | |||
61 | static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); | |||
62 | static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, | |||
63 | uint64_t iova, uint64_t len, int do_map); | |||
64 | ||||
65 | /* IOMMU types we support */ | |||
66 | static const struct vfio_iommu_type iommu_types[] = { | |||
67 | /* x86 IOMMU, otherwise known as type 1 */ | |||
68 | { | |||
69 | .type_id = RTE_VFIO_TYPE11, | |||
70 | .name = "Type 1", | |||
71 | .dma_map_func = &vfio_type1_dma_map, | |||
72 | .dma_user_map_func = &vfio_type1_dma_mem_map | |||
73 | }, | |||
74 | /* ppc64 IOMMU, otherwise known as spapr */ | |||
75 | { | |||
76 | .type_id = RTE_VFIO_SPAPR7, | |||
77 | .name = "sPAPR", | |||
78 | .dma_map_func = &vfio_spapr_dma_map, | |||
79 | .dma_user_map_func = &vfio_spapr_dma_mem_map | |||
80 | }, | |||
81 | /* IOMMU-less mode */ | |||
82 | { | |||
83 | .type_id = RTE_VFIO_NOIOMMU8, | |||
84 | .name = "No-IOMMU", | |||
85 | .dma_map_func = &vfio_noiommu_dma_map, | |||
86 | .dma_user_map_func = &vfio_noiommu_dma_mem_map | |||
87 | }, | |||
88 | }; | |||
89 | ||||
90 | static int | |||
91 | is_null_map(const struct user_mem_map *map) | |||
92 | { | |||
93 | return map->addr == 0 && map->iova == 0 && map->len == 0; | |||
94 | } | |||
95 | ||||
96 | /* we may need to merge user mem maps together in case of user mapping/unmapping | |||
97 | * chunks of memory, so we'll need a comparator function to sort segments. | |||
98 | */ | |||
99 | static int | |||
100 | user_mem_map_cmp(const void *a, const void *b) | |||
101 | { | |||
102 | const struct user_mem_map *umm_a = a; | |||
103 | const struct user_mem_map *umm_b = b; | |||
104 | ||||
105 | /* move null entries to end */ | |||
106 | if (is_null_map(umm_a)) | |||
107 | return 1; | |||
108 | if (is_null_map(umm_b)) | |||
109 | return -1; | |||
110 | ||||
111 | /* sort by iova first */ | |||
112 | if (umm_a->iova < umm_b->iova) | |||
113 | return -1; | |||
114 | if (umm_a->iova > umm_b->iova) | |||
115 | return 1; | |||
116 | ||||
117 | if (umm_a->addr < umm_b->addr) | |||
118 | return -1; | |||
119 | if (umm_a->addr > umm_b->addr) | |||
120 | return 1; | |||
121 | ||||
122 | if (umm_a->len < umm_b->len) | |||
123 | return -1; | |||
124 | if (umm_a->len > umm_b->len) | |||
125 | return 1; | |||
126 | ||||
127 | return 0; | |||
128 | } | |||
129 | ||||
130 | /* adjust user map entry. this may result in shortening of existing map, or in | |||
131 | * splitting existing map in two pieces. | |||
132 | */ | |||
133 | static void | |||
134 | adjust_map(struct user_mem_map *src, struct user_mem_map *end, | |||
135 | uint64_t remove_va_start, uint64_t remove_len) | |||
136 | { | |||
137 | /* if va start is same as start address, we're simply moving start */ | |||
138 | if (remove_va_start == src->addr) { | |||
139 | src->addr += remove_len; | |||
140 | src->iova += remove_len; | |||
141 | src->len -= remove_len; | |||
142 | } else if (remove_va_start + remove_len == src->addr + src->len) { | |||
143 | /* we're shrinking mapping from the end */ | |||
144 | src->len -= remove_len; | |||
145 | } else { | |||
146 | /* we're blowing a hole in the middle */ | |||
147 | struct user_mem_map tmp; | |||
148 | uint64_t total_len = src->len; | |||
149 | ||||
150 | /* adjust source segment length */ | |||
151 | src->len = remove_va_start - src->addr; | |||
152 | ||||
153 | /* create temporary segment in the middle */ | |||
154 | tmp.addr = src->addr + src->len; | |||
155 | tmp.iova = src->iova + src->len; | |||
156 | tmp.len = remove_len; | |||
157 | ||||
158 | /* populate end segment - this one we will be keeping */ | |||
159 | end->addr = tmp.addr + tmp.len; | |||
160 | end->iova = tmp.iova + tmp.len; | |||
161 | end->len = total_len - src->len - tmp.len; | |||
162 | } | |||
163 | } | |||
164 | ||||
165 | /* try merging two maps into one, return 1 if succeeded */ | |||
166 | static int | |||
167 | merge_map(struct user_mem_map *left, struct user_mem_map *right) | |||
168 | { | |||
169 | if (left->addr + left->len != right->addr) | |||
170 | return 0; | |||
171 | if (left->iova + left->len != right->iova) | |||
172 | return 0; | |||
173 | ||||
174 | left->len += right->len; | |||
175 | ||||
176 | memset(right, 0, sizeof(*right)); | |||
177 | ||||
178 | return 1; | |||
179 | } | |||
180 | ||||
181 | static struct user_mem_map * | |||
182 | find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr, | |||
183 | uint64_t iova, uint64_t len) | |||
184 | { | |||
185 | uint64_t va_end = addr + len; | |||
186 | uint64_t iova_end = iova + len; | |||
187 | int i; | |||
188 | ||||
189 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |||
190 | struct user_mem_map *map = &user_mem_maps->maps[i]; | |||
191 | uint64_t map_va_end = map->addr + map->len; | |||
192 | uint64_t map_iova_end = map->iova + map->len; | |||
193 | ||||
194 | /* check start VA */ | |||
195 | if (addr < map->addr || addr >= map_va_end) | |||
196 | continue; | |||
197 | /* check if VA end is within boundaries */ | |||
198 | if (va_end <= map->addr || va_end > map_va_end) | |||
199 | continue; | |||
200 | ||||
201 | /* check start IOVA */ | |||
202 | if (iova < map->iova || iova >= map_iova_end) | |||
203 | continue; | |||
204 | /* check if IOVA end is within boundaries */ | |||
205 | if (iova_end <= map->iova || iova_end > map_iova_end) | |||
206 | continue; | |||
207 | ||||
208 | /* we've found our map */ | |||
209 | return map; | |||
210 | } | |||
211 | return NULL((void*)0); | |||
212 | } | |||
213 | ||||
214 | /* this will sort all user maps, and merge/compact any adjacent maps */ | |||
215 | static void | |||
216 | compact_user_maps(struct user_mem_maps *user_mem_maps) | |||
217 | { | |||
218 | int i, n_merged, cur_idx; | |||
219 | ||||
220 | qsort(user_mem_maps->maps, user_mem_maps->n_maps, | |||
221 | sizeof(user_mem_maps->maps[0]), user_mem_map_cmp); | |||
222 | ||||
223 | /* we'll go over the list backwards when merging */ | |||
224 | n_merged = 0; | |||
225 | for (i = user_mem_maps->n_maps - 2; i >= 0; i--) { | |||
226 | struct user_mem_map *l, *r; | |||
227 | ||||
228 | l = &user_mem_maps->maps[i]; | |||
229 | r = &user_mem_maps->maps[i + 1]; | |||
230 | ||||
231 | if (is_null_map(l) || is_null_map(r)) | |||
232 | continue; | |||
233 | ||||
234 | if (merge_map(l, r)) | |||
235 | n_merged++; | |||
236 | } | |||
237 | ||||
238 | /* the entries are still sorted, but now they have holes in them, so | |||
239 | * walk through the list and remove the holes | |||
240 | */ | |||
241 | if (n_merged > 0) { | |||
242 | cur_idx = 0; | |||
243 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |||
244 | if (!is_null_map(&user_mem_maps->maps[i])) { | |||
245 | struct user_mem_map *src, *dst; | |||
246 | ||||
247 | src = &user_mem_maps->maps[i]; | |||
248 | dst = &user_mem_maps->maps[cur_idx++]; | |||
249 | ||||
250 | if (src != dst) { | |||
251 | memcpy(dst, src, sizeof(*src)); | |||
252 | memset(src, 0, sizeof(*src)); | |||
253 | } | |||
254 | } | |||
255 | } | |||
256 | user_mem_maps->n_maps = cur_idx; | |||
257 | } | |||
258 | } | |||
259 | ||||
260 | static int | |||
261 | vfio_open_group_fd(int iommu_group_num) | |||
262 | { | |||
263 | int vfio_group_fd; | |||
264 | char filename[PATH_MAX4096]; | |||
265 | struct rte_mp_msg mp_req, *mp_rep; | |||
266 | struct rte_mp_reply mp_reply; | |||
267 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; | |||
268 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; | |||
269 | ||||
270 | /* if primary, try to open the group */ | |||
271 | if (internal_config.process_type == RTE_PROC_PRIMARY) { | |||
272 | /* try regular group format */ | |||
273 | snprintf(filename, sizeof(filename), | |||
274 | VFIO_GROUP_FMT"/dev/vfio/%u", iommu_group_num); | |||
275 | vfio_group_fd = open(filename, O_RDWR02); | |||
276 | if (vfio_group_fd < 0) { | |||
277 | /* if file not found, it's not an error */ | |||
278 | if (errno(*__errno_location ()) != ENOENT2) { | |||
279 | RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,rte_log(4U, 0, "EAL" ": " "Cannot open %s: %s\n", filename, strerror ((*__errno_location ()))) | |||
280 | strerror(errno))rte_log(4U, 0, "EAL" ": " "Cannot open %s: %s\n", filename, strerror ((*__errno_location ()))); | |||
281 | return -1; | |||
282 | } | |||
283 | ||||
284 | /* special case: try no-IOMMU path as well */ | |||
285 | snprintf(filename, sizeof(filename), | |||
286 | VFIO_NOIOMMU_GROUP_FMT"/dev/vfio/noiommu-%u", | |||
287 | iommu_group_num); | |||
288 | vfio_group_fd = open(filename, O_RDWR02); | |||
289 | if (vfio_group_fd < 0) { | |||
290 | if (errno(*__errno_location ()) != ENOENT2) { | |||
291 | RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,rte_log(4U, 0, "EAL" ": " "Cannot open %s: %s\n", filename, strerror ((*__errno_location ()))) | |||
292 | strerror(errno))rte_log(4U, 0, "EAL" ": " "Cannot open %s: %s\n", filename, strerror ((*__errno_location ()))); | |||
293 | return -1; | |||
294 | } | |||
295 | return 0; | |||
296 | } | |||
297 | /* noiommu group found */ | |||
298 | } | |||
299 | ||||
300 | return vfio_group_fd; | |||
301 | } | |||
302 | /* if we're in a secondary process, request group fd from the primary | |||
303 | * process via mp channel. | |||
304 | */ | |||
305 | p->req = SOCKET_REQ_GROUP0x200; | |||
306 | p->group_num = iommu_group_num; | |||
307 | strcpy(mp_req.name, EAL_VFIO_MP"eal_vfio_mp_sync"); | |||
308 | mp_req.len_param = sizeof(*p); | |||
309 | mp_req.num_fds = 0; | |||
310 | ||||
311 | vfio_group_fd = -1; | |||
312 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && | |||
313 | mp_reply.nb_received == 1) { | |||
314 | mp_rep = &mp_reply.msgs[0]; | |||
315 | p = (struct vfio_mp_param *)mp_rep->param; | |||
316 | if (p->result == SOCKET_OK0x0 && mp_rep->num_fds == 1) { | |||
317 | vfio_group_fd = mp_rep->fds[0]; | |||
318 | } else if (p->result == SOCKET_NO_FD0x1) { | |||
319 | RTE_LOG(ERR, EAL, " bad VFIO group fd\n")rte_log(4U, 0, "EAL" ": " " bad VFIO group fd\n"); | |||
320 | vfio_group_fd = 0; | |||
321 | } | |||
322 | free(mp_reply.msgs); | |||
323 | } | |||
324 | ||||
325 | if (vfio_group_fd < 0) | |||
326 | RTE_LOG(ERR, EAL, " cannot request group fd\n")rte_log(4U, 0, "EAL" ": " " cannot request group fd\n"); | |||
327 | return vfio_group_fd; | |||
328 | } | |||
329 | ||||
330 | static struct vfio_config * | |||
331 | get_vfio_cfg_by_group_num(int iommu_group_num) | |||
332 | { | |||
333 | struct vfio_config *vfio_cfg; | |||
334 | int i, j; | |||
335 | ||||
336 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { | |||
337 | vfio_cfg = &vfio_cfgs[i]; | |||
338 | for (j = 0; j < VFIO_MAX_GROUPS64; j++) { | |||
339 | if (vfio_cfg->vfio_groups[j].group_num == | |||
340 | iommu_group_num) | |||
341 | return vfio_cfg; | |||
342 | } | |||
343 | } | |||
344 | ||||
345 | return NULL((void*)0); | |||
346 | } | |||
347 | ||||
348 | static int | |||
349 | vfio_get_group_fd(struct vfio_config *vfio_cfg, | |||
350 | int iommu_group_num) | |||
351 | { | |||
352 | int i; | |||
353 | int vfio_group_fd; | |||
354 | struct vfio_group *cur_grp; | |||
355 | ||||
356 | /* check if we already have the group descriptor open */ | |||
357 | for (i = 0; i < VFIO_MAX_GROUPS64; i++) | |||
358 | if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) | |||
359 | return vfio_cfg->vfio_groups[i].fd; | |||
360 | ||||
361 | /* Lets see first if there is room for a new group */ | |||
362 | if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS64) { | |||
363 | RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n")rte_log(4U, 0, "EAL" ": " "Maximum number of VFIO groups reached!\n" ); | |||
364 | return -1; | |||
365 | } | |||
366 | ||||
367 | /* Now lets get an index for the new group */ | |||
368 | for (i = 0; i < VFIO_MAX_GROUPS64; i++) | |||
369 | if (vfio_cfg->vfio_groups[i].group_num == -1) { | |||
370 | cur_grp = &vfio_cfg->vfio_groups[i]; | |||
371 | break; | |||
372 | } | |||
373 | ||||
374 | /* This should not happen */ | |||
375 | if (i == VFIO_MAX_GROUPS64) { | |||
376 | RTE_LOG(ERR, EAL, "No VFIO group free slot found\n")rte_log(4U, 0, "EAL" ": " "No VFIO group free slot found\n"); | |||
377 | return -1; | |||
378 | } | |||
379 | ||||
380 | vfio_group_fd = vfio_open_group_fd(iommu_group_num); | |||
381 | if (vfio_group_fd < 0) { | |||
382 | RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num)rte_log(4U, 0, "EAL" ": " "Failed to open group %d\n", iommu_group_num ); | |||
383 | return -1; | |||
384 | } | |||
385 | ||||
386 | cur_grp->group_num = iommu_group_num; | |||
387 | cur_grp->fd = vfio_group_fd; | |||
388 | vfio_cfg->vfio_active_groups++; | |||
389 | ||||
390 | return vfio_group_fd; | |||
391 | } | |||
392 | ||||
393 | static struct vfio_config * | |||
394 | get_vfio_cfg_by_group_fd(int vfio_group_fd) | |||
395 | { | |||
396 | struct vfio_config *vfio_cfg; | |||
397 | int i, j; | |||
398 | ||||
399 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { | |||
400 | vfio_cfg = &vfio_cfgs[i]; | |||
401 | for (j = 0; j < VFIO_MAX_GROUPS64; j++) | |||
402 | if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) | |||
403 | return vfio_cfg; | |||
404 | } | |||
405 | ||||
406 | return NULL((void*)0); | |||
407 | } | |||
408 | ||||
409 | static struct vfio_config * | |||
410 | get_vfio_cfg_by_container_fd(int container_fd) | |||
411 | { | |||
412 | int i; | |||
413 | ||||
414 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { | |||
415 | if (vfio_cfgs[i].vfio_container_fd == container_fd) | |||
416 | return &vfio_cfgs[i]; | |||
417 | } | |||
418 | ||||
419 | return NULL((void*)0); | |||
420 | } | |||
421 | ||||
422 | int | |||
423 | rte_vfio_get_group_fd(int iommu_group_num) | |||
424 | { | |||
425 | struct vfio_config *vfio_cfg; | |||
426 | ||||
427 | /* get the vfio_config it belongs to */ | |||
428 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); | |||
429 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; | |||
430 | ||||
431 | return vfio_get_group_fd(vfio_cfg, iommu_group_num); | |||
432 | } | |||
433 | ||||
434 | static int | |||
435 | get_vfio_group_idx(int vfio_group_fd) | |||
436 | { | |||
437 | struct vfio_config *vfio_cfg; | |||
438 | int i, j; | |||
439 | ||||
440 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { | |||
441 | vfio_cfg = &vfio_cfgs[i]; | |||
442 | for (j = 0; j < VFIO_MAX_GROUPS64; j++) | |||
443 | if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) | |||
444 | return j; | |||
445 | } | |||
446 | ||||
447 | return -1; | |||
448 | } | |||
449 | ||||
450 | static void | |||
451 | vfio_group_device_get(int vfio_group_fd) | |||
452 | { | |||
453 | struct vfio_config *vfio_cfg; | |||
454 | int i; | |||
455 | ||||
456 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); | |||
457 | if (vfio_cfg == NULL((void*)0)) { | |||
458 | RTE_LOG(ERR, EAL, " invalid group fd!\n")rte_log(4U, 0, "EAL" ": " " invalid group fd!\n"); | |||
459 | return; | |||
460 | } | |||
461 | ||||
462 | i = get_vfio_group_idx(vfio_group_fd); | |||
463 | if (i < 0 || i > (VFIO_MAX_GROUPS64 - 1)) | |||
464 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i)rte_log(4U, 0, "EAL" ": " " wrong vfio_group index (%d)\n", i ); | |||
465 | else | |||
466 | vfio_cfg->vfio_groups[i].devices++; | |||
467 | } | |||
468 | ||||
469 | static void | |||
470 | vfio_group_device_put(int vfio_group_fd) | |||
471 | { | |||
472 | struct vfio_config *vfio_cfg; | |||
473 | int i; | |||
474 | ||||
475 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); | |||
476 | if (vfio_cfg == NULL((void*)0)) { | |||
477 | RTE_LOG(ERR, EAL, " invalid group fd!\n")rte_log(4U, 0, "EAL" ": " " invalid group fd!\n"); | |||
478 | return; | |||
479 | } | |||
480 | ||||
481 | i = get_vfio_group_idx(vfio_group_fd); | |||
482 | if (i < 0 || i > (VFIO_MAX_GROUPS64 - 1)) | |||
483 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i)rte_log(4U, 0, "EAL" ": " " wrong vfio_group index (%d)\n", i ); | |||
484 | else | |||
485 | vfio_cfg->vfio_groups[i].devices--; | |||
486 | } | |||
487 | ||||
488 | static int | |||
489 | vfio_group_device_count(int vfio_group_fd) | |||
490 | { | |||
491 | struct vfio_config *vfio_cfg; | |||
492 | int i; | |||
493 | ||||
494 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); | |||
495 | if (vfio_cfg == NULL((void*)0)) { | |||
496 | RTE_LOG(ERR, EAL, " invalid group fd!\n")rte_log(4U, 0, "EAL" ": " " invalid group fd!\n"); | |||
497 | return -1; | |||
498 | } | |||
499 | ||||
500 | i = get_vfio_group_idx(vfio_group_fd); | |||
501 | if (i < 0 || i > (VFIO_MAX_GROUPS64 - 1)) { | |||
502 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i)rte_log(4U, 0, "EAL" ": " " wrong vfio_group index (%d)\n", i ); | |||
503 | return -1; | |||
504 | } | |||
505 | ||||
506 | return vfio_cfg->vfio_groups[i].devices; | |||
507 | } | |||
508 | ||||
509 | static void | |||
510 | vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len, | |||
511 | void *arg __rte_unused__attribute__((__unused__))) | |||
512 | { | |||
513 | struct rte_memseg_list *msl; | |||
514 | struct rte_memseg *ms; | |||
515 | size_t cur_len = 0; | |||
516 | ||||
517 | msl = rte_mem_virt2memseg_list(addr); | |||
518 | ||||
519 | /* for IOVA as VA mode, no need to care for IOVA addresses */ | |||
520 | if (rte_eal_iova_mode() == RTE_IOVA_VA && msl->external == 0) { | |||
521 | uint64_t vfio_va = (uint64_t)(uintptr_t)addr; | |||
522 | if (type == RTE_MEM_EVENT_ALLOC) | |||
523 | vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, | |||
524 | len, 1); | |||
525 | else | |||
526 | vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, | |||
527 | len, 0); | |||
528 | return; | |||
529 | } | |||
530 | ||||
531 | /* memsegs are contiguous in memory */ | |||
532 | ms = rte_mem_virt2memseg(addr, msl); | |||
533 | while (cur_len < len) { | |||
534 | /* some memory segments may have invalid IOVA */ | |||
535 | if (ms->iova == RTE_BAD_IOVA((rte_iova_t)-1)) { | |||
536 | RTE_LOG(DEBUG, EAL, "Memory segment at %p has bad IOVA, skipping\n",rte_log(8U, 0, "EAL" ": " "Memory segment at %p has bad IOVA, skipping\n" , ms->addr) | |||
537 | ms->addr)rte_log(8U, 0, "EAL" ": " "Memory segment at %p has bad IOVA, skipping\n" , ms->addr); | |||
538 | goto next; | |||
539 | } | |||
540 | if (type == RTE_MEM_EVENT_ALLOC) | |||
541 | vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, | |||
542 | ms->iova, ms->len, 1); | |||
543 | else | |||
544 | vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, | |||
545 | ms->iova, ms->len, 0); | |||
546 | next: | |||
547 | cur_len += ms->len; | |||
548 | ++ms; | |||
549 | } | |||
550 | } | |||
551 | ||||
552 | static int | |||
553 | vfio_sync_default_container(void) | |||
554 | { | |||
555 | struct rte_mp_msg mp_req, *mp_rep; | |||
556 | struct rte_mp_reply mp_reply; | |||
557 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; | |||
558 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; | |||
559 | int iommu_type_id; | |||
560 | unsigned int i; | |||
561 | ||||
562 | /* cannot be called from primary */ | |||
563 | if (rte_eal_process_type() != RTE_PROC_SECONDARY) | |||
564 | return -1; | |||
565 | ||||
566 | /* default container fd should have been opened in rte_vfio_enable() */ | |||
567 | if (!default_vfio_cfg->vfio_enabled || | |||
568 | default_vfio_cfg->vfio_container_fd < 0) { | |||
569 | RTE_LOG(ERR, EAL, "VFIO support is not initialized\n")rte_log(4U, 0, "EAL" ": " "VFIO support is not initialized\n" ); | |||
570 | return -1; | |||
571 | } | |||
572 | ||||
573 | /* find default container's IOMMU type */ | |||
574 | p->req = SOCKET_REQ_IOMMU_TYPE0x800; | |||
575 | strcpy(mp_req.name, EAL_VFIO_MP"eal_vfio_mp_sync"); | |||
576 | mp_req.len_param = sizeof(*p); | |||
577 | mp_req.num_fds = 0; | |||
578 | ||||
579 | iommu_type_id = -1; | |||
580 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && | |||
581 | mp_reply.nb_received == 1) { | |||
582 | mp_rep = &mp_reply.msgs[0]; | |||
583 | p = (struct vfio_mp_param *)mp_rep->param; | |||
584 | if (p->result == SOCKET_OK0x0) | |||
585 | iommu_type_id = p->iommu_type_id; | |||
586 | free(mp_reply.msgs); | |||
587 | } | |||
588 | if (iommu_type_id < 0) { | |||
589 | RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n")rte_log(4U, 0, "EAL" ": " "Could not get IOMMU type for default container\n" ); | |||
590 | return -1; | |||
591 | } | |||
592 | ||||
593 | /* we now have an fd for default container, as well as its IOMMU type. | |||
594 | * now, set up default VFIO container config to match. | |||
595 | */ | |||
596 | for (i = 0; i < RTE_DIM(iommu_types)(sizeof (iommu_types) / sizeof ((iommu_types)[0])); i++) { | |||
597 | const struct vfio_iommu_type *t = &iommu_types[i]; | |||
598 | if (t->type_id != iommu_type_id) | |||
599 | continue; | |||
600 | ||||
601 | /* we found our IOMMU type */ | |||
602 | default_vfio_cfg->vfio_iommu_type = t; | |||
603 | ||||
604 | return 0; | |||
605 | } | |||
606 | RTE_LOG(ERR, EAL, "Could not find IOMMU type id (%i)\n",rte_log(4U, 0, "EAL" ": " "Could not find IOMMU type id (%i)\n" , iommu_type_id) | |||
607 | iommu_type_id)rte_log(4U, 0, "EAL" ": " "Could not find IOMMU type id (%i)\n" , iommu_type_id); | |||
608 | return -1; | |||
609 | } | |||
610 | ||||
611 | int | |||
612 | rte_vfio_clear_group(int vfio_group_fd) | |||
613 | { | |||
614 | int i; | |||
615 | struct vfio_config *vfio_cfg; | |||
616 | ||||
617 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); | |||
618 | if (vfio_cfg == NULL((void*)0)) { | |||
619 | RTE_LOG(ERR, EAL, " invalid group fd!\n")rte_log(4U, 0, "EAL" ": " " invalid group fd!\n"); | |||
620 | return -1; | |||
621 | } | |||
622 | ||||
623 | i = get_vfio_group_idx(vfio_group_fd); | |||
624 | if (i < 0) | |||
625 | return -1; | |||
626 | vfio_cfg->vfio_groups[i].group_num = -1; | |||
627 | vfio_cfg->vfio_groups[i].fd = -1; | |||
628 | vfio_cfg->vfio_groups[i].devices = 0; | |||
629 | vfio_cfg->vfio_active_groups--; | |||
630 | ||||
631 | return 0; | |||
632 | } | |||
633 | ||||
634 | int | |||
635 | rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, | |||
636 | int *vfio_dev_fd, struct vfio_device_info *device_info) | |||
637 | { | |||
638 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; | |||
639 | rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; | |||
640 | struct vfio_group_status group_status = { | |||
641 | .argsz = sizeof(group_status) | |||
642 | }; | |||
643 | struct vfio_config *vfio_cfg; | |||
644 | struct user_mem_maps *user_mem_maps; | |||
645 | int vfio_container_fd; | |||
646 | int vfio_group_fd; | |||
647 | int iommu_group_num; | |||
648 | int i, ret; | |||
649 | ||||
650 | /* get group number */ | |||
651 | ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); | |||
652 | if (ret == 0) { | |||
653 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver, skipping\n" , dev_addr) | |||
654 | dev_addr)rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver, skipping\n" , dev_addr); | |||
655 | return 1; | |||
656 | } | |||
657 | ||||
658 | /* if negative, something failed */ | |||
659 | if (ret < 0) | |||
660 | return -1; | |||
661 | ||||
662 | /* get the actual group fd */ | |||
663 | vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); | |||
664 | if (vfio_group_fd < 0) | |||
665 | return -1; | |||
666 | ||||
667 | /* if group_fd == 0, that means the device isn't managed by VFIO */ | |||
668 | if (vfio_group_fd == 0) { | |||
669 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver, skipping\n" , dev_addr) | |||
670 | dev_addr)rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver, skipping\n" , dev_addr); | |||
671 | return 1; | |||
672 | } | |||
673 | ||||
674 | /* | |||
675 | * at this point, we know that this group is viable (meaning, all devices | |||
676 | * are either bound to VFIO or not bound to anything) | |||
677 | */ | |||
678 | ||||
679 | /* check if the group is viable */ | |||
680 | ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 3)) << 0) | ((0) << ((0 +8)+8))), &group_status); | |||
681 | if (ret) { | |||
682 | RTE_LOG(ERR, EAL, " %s cannot get group status, "rte_log(4U, 0, "EAL" ": " " %s cannot get group status, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) | |||
683 | "error %i (%s)\n", dev_addr, errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " %s cannot get group status, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))); | |||
684 | close(vfio_group_fd); | |||
685 | rte_vfio_clear_group(vfio_group_fd); | |||
686 | return -1; | |||
687 | } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE(1 << 0))) { | |||
688 | RTE_LOG(ERR, EAL, " %s VFIO group is not viable! "rte_log(4U, 0, "EAL" ": " " %s VFIO group is not viable! " "Not all devices in IOMMU group bound to VFIO or unbound\n" , dev_addr) | |||
689 | "Not all devices in IOMMU group bound to VFIO or unbound\n",rte_log(4U, 0, "EAL" ": " " %s VFIO group is not viable! " "Not all devices in IOMMU group bound to VFIO or unbound\n" , dev_addr) | |||
690 | dev_addr)rte_log(4U, 0, "EAL" ": " " %s VFIO group is not viable! " "Not all devices in IOMMU group bound to VFIO or unbound\n" , dev_addr); | |||
691 | close(vfio_group_fd); | |||
692 | rte_vfio_clear_group(vfio_group_fd); | |||
693 | return -1; | |||
694 | } | |||
695 | ||||
696 | /* get the vfio_config it belongs to */ | |||
697 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); | |||
698 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; | |||
699 | vfio_container_fd = vfio_cfg->vfio_container_fd; | |||
700 | user_mem_maps = &vfio_cfg->mem_maps; | |||
701 | ||||
702 | /* check if group does not have a container yet */ | |||
703 | if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET(1 << 1))) { | |||
704 | ||||
705 | /* add group to a container */ | |||
706 | ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 4)) << 0) | ((0) << ((0 +8)+8))), | |||
707 | &vfio_container_fd); | |||
708 | if (ret) { | |||
709 | RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "rte_log(4U, 0, "EAL" ": " " %s cannot add VFIO group to container, " "error %i (%s)\n", dev_addr, (*__errno_location ()), strerror ((*__errno_location ()))) | |||
710 | "error %i (%s)\n", dev_addr, errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " %s cannot add VFIO group to container, " "error %i (%s)\n", dev_addr, (*__errno_location ()), strerror ((*__errno_location ()))); | |||
711 | close(vfio_group_fd); | |||
712 | rte_vfio_clear_group(vfio_group_fd); | |||
713 | return -1; | |||
714 | } | |||
715 | ||||
716 | /* | |||
717 | * pick an IOMMU type and set up DMA mappings for container | |||
718 | * | |||
719 | * needs to be done only once, only when first group is | |||
720 | * assigned to a container and only in primary process. | |||
721 | * Note this can happen several times with the hotplug | |||
722 | * functionality. | |||
723 | */ | |||
724 | if (internal_config.process_type == RTE_PROC_PRIMARY && | |||
725 | vfio_cfg->vfio_active_groups == 1 && | |||
726 | vfio_group_device_count(vfio_group_fd) == 0) { | |||
727 | const struct vfio_iommu_type *t; | |||
728 | ||||
729 | /* select an IOMMU type which we will be using */ | |||
730 | t = vfio_set_iommu_type(vfio_container_fd); | |||
731 | if (!t) { | |||
732 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " %s failed to select IOMMU type\n" , dev_addr) | |||
733 | " %s failed to select IOMMU type\n",rte_log(4U, 0, "EAL" ": " " %s failed to select IOMMU type\n" , dev_addr) | |||
734 | dev_addr)rte_log(4U, 0, "EAL" ": " " %s failed to select IOMMU type\n" , dev_addr); | |||
735 | close(vfio_group_fd); | |||
736 | rte_vfio_clear_group(vfio_group_fd); | |||
737 | return -1; | |||
738 | } | |||
739 | /* lock memory hotplug before mapping and release it | |||
740 | * after registering callback, to prevent races | |||
741 | */ | |||
742 | rte_rwlock_read_lock(mem_lock); | |||
743 | if (vfio_cfg == default_vfio_cfg) | |||
744 | ret = t->dma_map_func(vfio_container_fd); | |||
745 | else | |||
746 | ret = 0; | |||
747 | if (ret) { | |||
748 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " %s DMA remapping failed, error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) | |||
749 | " %s DMA remapping failed, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " %s DMA remapping failed, error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) | |||
750 | dev_addr, errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " %s DMA remapping failed, error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))); | |||
751 | close(vfio_group_fd); | |||
752 | rte_vfio_clear_group(vfio_group_fd); | |||
753 | rte_rwlock_read_unlock(mem_lock); | |||
754 | return -1; | |||
755 | } | |||
756 | ||||
757 | vfio_cfg->vfio_iommu_type = t; | |||
758 | ||||
759 | /* re-map all user-mapped segments */ | |||
760 | rte_spinlock_recursive_lock(&user_mem_maps->lock); | |||
761 | ||||
762 | /* this IOMMU type may not support DMA mapping, but | |||
763 | * if we have mappings in the list - that means we have | |||
764 | * previously mapped something successfully, so we can | |||
765 | * be sure that DMA mapping is supported. | |||
766 | */ | |||
767 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |||
768 | struct user_mem_map *map; | |||
769 | map = &user_mem_maps->maps[i]; | |||
770 | ||||
771 | ret = t->dma_user_map_func( | |||
772 | vfio_container_fd, | |||
773 | map->addr, map->iova, map->len, | |||
774 | 1); | |||
775 | if (ret) { | |||
776 | RTE_LOG(ERR, EAL, "Couldn't map user memory for DMA: "rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) | |||
777 | "va: 0x%" PRIx64 " "rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) | |||
778 | "iova: 0x%" PRIx64 " "rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) | |||
779 | "len: 0x%" PRIu64 "\n",rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) | |||
780 | map->addr, map->iova,rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len) | |||
781 | map->len)rte_log(4U, 0, "EAL" ": " "Couldn't map user memory for DMA: " "va: 0x%" "l" "x" " " "iova: 0x%" "l" "x" " " "len: 0x%" "l" "u" "\n", map->addr, map->iova, map->len); | |||
782 | rte_spinlock_recursive_unlock( | |||
783 | &user_mem_maps->lock); | |||
784 | rte_rwlock_read_unlock(mem_lock); | |||
785 | return -1; | |||
786 | } | |||
787 | } | |||
788 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); | |||
789 | ||||
790 | /* register callback for mem events */ | |||
791 | if (vfio_cfg == default_vfio_cfg) | |||
792 | ret = rte_mem_event_callback_register( | |||
793 | VFIO_MEM_EVENT_CLB_NAME"vfio_mem_event_clb", | |||
794 | vfio_mem_event_callback, NULL((void*)0)); | |||
795 | else | |||
796 | ret = 0; | |||
797 | /* unlock memory hotplug */ | |||
798 | rte_rwlock_read_unlock(mem_lock); | |||
799 | ||||
800 | if (ret && rte_errno(per_lcore__rte_errno) != ENOTSUP95) { | |||
801 | RTE_LOG(ERR, EAL, "Could not install memory event callback for VFIO\n")rte_log(4U, 0, "EAL" ": " "Could not install memory event callback for VFIO\n" ); | |||
802 | return -1; | |||
803 | } | |||
804 | if (ret) | |||
805 | RTE_LOG(DEBUG, EAL, "Memory event callbacks not supported\n")rte_log(8U, 0, "EAL" ": " "Memory event callbacks not supported\n" ); | |||
806 | else | |||
807 | RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n")rte_log(8U, 0, "EAL" ": " "Installed memory event callback for VFIO\n" ); | |||
808 | } | |||
809 | } else if (rte_eal_process_type() != RTE_PROC_PRIMARY && | |||
810 | vfio_cfg == default_vfio_cfg && | |||
811 | vfio_cfg->vfio_iommu_type == NULL((void*)0)) { | |||
812 | /* if we're not a primary process, we do not set up the VFIO | |||
813 | * container because it's already been set up by the primary | |||
814 | * process. instead, we simply ask the primary about VFIO type | |||
815 | * we are using, and set the VFIO config up appropriately. | |||
816 | */ | |||
817 | ret = vfio_sync_default_container(); | |||
818 | if (ret < 0) { | |||
819 | RTE_LOG(ERR, EAL, "Could not sync default VFIO container\n")rte_log(4U, 0, "EAL" ": " "Could not sync default VFIO container\n" ); | |||
820 | close(vfio_group_fd); | |||
821 | rte_vfio_clear_group(vfio_group_fd); | |||
822 | return -1; | |||
823 | } | |||
824 | /* we have successfully initialized VFIO, notify user */ | |||
825 | const struct vfio_iommu_type *t = | |||
826 | default_vfio_cfg->vfio_iommu_type; | |||
827 | RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",rte_log(6U, 0, "EAL" ": " " using IOMMU type %d (%s)\n", t-> type_id, t->name) | |||
828 | t->type_id, t->name)rte_log(6U, 0, "EAL" ": " " using IOMMU type %d (%s)\n", t-> type_id, t->name); | |||
829 | } | |||
830 | ||||
831 | /* get a file descriptor for the device */ | |||
832 | *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 6)) << 0) | ((0) << ((0 +8)+8))), dev_addr); | |||
833 | if (*vfio_dev_fd < 0) { | |||
834 | /* if we cannot get a device fd, this implies a problem with | |||
835 | * the VFIO group or the container not having IOMMU configured. | |||
836 | */ | |||
837 | ||||
838 | RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n",rte_log(5U, 0, "EAL" ": " "Getting a vfio_dev_fd for %s failed\n" , dev_addr) | |||
839 | dev_addr)rte_log(5U, 0, "EAL" ": " "Getting a vfio_dev_fd for %s failed\n" , dev_addr); | |||
840 | close(vfio_group_fd); | |||
841 | rte_vfio_clear_group(vfio_group_fd); | |||
842 | return -1; | |||
843 | } | |||
844 | ||||
845 | /* test and setup the device */ | |||
846 | ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 7)) << 0) | ((0) << ((0 +8)+8))), device_info); | |||
847 | if (ret) { | |||
848 | RTE_LOG(ERR, EAL, " %s cannot get device info, "rte_log(4U, 0, "EAL" ": " " %s cannot get device info, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) | |||
849 | "error %i (%s)\n", dev_addr, errno,rte_log(4U, 0, "EAL" ": " " %s cannot get device info, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))) | |||
850 | strerror(errno))rte_log(4U, 0, "EAL" ": " " %s cannot get device info, " "error %i (%s)\n" , dev_addr, (*__errno_location ()), strerror((*__errno_location ()))); | |||
851 | close(*vfio_dev_fd); | |||
852 | close(vfio_group_fd); | |||
853 | rte_vfio_clear_group(vfio_group_fd); | |||
854 | return -1; | |||
855 | } | |||
856 | vfio_group_device_get(vfio_group_fd); | |||
857 | ||||
858 | return 0; | |||
859 | } | |||
860 | ||||
861 | int | |||
862 | rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, | |||
863 | int vfio_dev_fd) | |||
864 | { | |||
865 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; | |||
866 | rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; | |||
867 | struct vfio_group_status group_status = { | |||
868 | .argsz = sizeof(group_status) | |||
869 | }; | |||
870 | struct vfio_config *vfio_cfg; | |||
871 | int vfio_group_fd; | |||
872 | int iommu_group_num; | |||
873 | int ret; | |||
874 | ||||
875 | /* we don't want any DMA mapping messages to come while we're detaching | |||
876 | * VFIO device, because this might be the last device and we might need | |||
877 | * to unregister the callback. | |||
878 | */ | |||
879 | rte_rwlock_read_lock(mem_lock); | |||
880 | ||||
881 | /* get group number */ | |||
882 | ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); | |||
883 | if (ret <= 0) { | |||
884 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n",rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver\n" , dev_addr) | |||
885 | dev_addr)rte_log(5U, 0, "EAL" ": " " %s not managed by VFIO driver\n" , dev_addr); | |||
886 | /* This is an error at this point. */ | |||
887 | ret = -1; | |||
888 | goto out; | |||
889 | } | |||
890 | ||||
891 | /* get the actual group fd */ | |||
892 | vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); | |||
893 | if (vfio_group_fd <= 0) { | |||
894 | RTE_LOG(INFO, EAL, "rte_vfio_get_group_fd failed for %s\n",rte_log(7U, 0, "EAL" ": " "rte_vfio_get_group_fd failed for %s\n" , dev_addr) | |||
895 | dev_addr)rte_log(7U, 0, "EAL" ": " "rte_vfio_get_group_fd failed for %s\n" , dev_addr); | |||
896 | ret = -1; | |||
897 | goto out; | |||
898 | } | |||
899 | ||||
900 | /* get the vfio_config it belongs to */ | |||
901 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); | |||
902 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; | |||
903 | ||||
904 | /* At this point we got an active group. Closing it will make the | |||
905 | * container detachment. If this is the last active group, VFIO kernel | |||
906 | * code will unset the container and the IOMMU mappings. | |||
907 | */ | |||
908 | ||||
909 | /* Closing a device */ | |||
910 | if (close(vfio_dev_fd) < 0) { | |||
911 | RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",rte_log(7U, 0, "EAL" ": " "Error when closing vfio_dev_fd for %s\n" , dev_addr) | |||
912 | dev_addr)rte_log(7U, 0, "EAL" ": " "Error when closing vfio_dev_fd for %s\n" , dev_addr); | |||
913 | ret = -1; | |||
914 | goto out; | |||
915 | } | |||
916 | ||||
917 | /* An VFIO group can have several devices attached. Just when there is | |||
918 | * no devices remaining should the group be closed. | |||
919 | */ | |||
920 | vfio_group_device_put(vfio_group_fd); | |||
921 | if (!vfio_group_device_count(vfio_group_fd)) { | |||
922 | ||||
923 | if (close(vfio_group_fd) < 0) { | |||
924 | RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",rte_log(7U, 0, "EAL" ": " "Error when closing vfio_group_fd for %s\n" , dev_addr) | |||
925 | dev_addr)rte_log(7U, 0, "EAL" ": " "Error when closing vfio_group_fd for %s\n" , dev_addr); | |||
926 | ret = -1; | |||
927 | goto out; | |||
928 | } | |||
929 | ||||
930 | if (rte_vfio_clear_group(vfio_group_fd) < 0) { | |||
931 | RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",rte_log(7U, 0, "EAL" ": " "Error when clearing group for %s\n" , dev_addr) | |||
932 | dev_addr)rte_log(7U, 0, "EAL" ": " "Error when clearing group for %s\n" , dev_addr); | |||
933 | ret = -1; | |||
934 | goto out; | |||
935 | } | |||
936 | } | |||
937 | ||||
938 | /* if there are no active device groups, unregister the callback to | |||
939 | * avoid spurious attempts to map/unmap memory from VFIO. | |||
940 | */ | |||
941 | if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0 && | |||
942 | rte_eal_process_type() != RTE_PROC_SECONDARY) | |||
943 | rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME"vfio_mem_event_clb", | |||
944 | NULL((void*)0)); | |||
945 | ||||
946 | /* success */ | |||
947 | ret = 0; | |||
948 | ||||
949 | out: | |||
950 | rte_rwlock_read_unlock(mem_lock); | |||
951 | return ret; | |||
952 | } | |||
953 | ||||
954 | int | |||
955 | rte_vfio_enable(const char *modname) | |||
956 | { | |||
957 | /* initialize group list */ | |||
958 | int i, j; | |||
959 | int vfio_available; | |||
960 | ||||
961 | rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER{{ 0 }, -1, 0}; | |||
962 | ||||
963 | for (i = 0; i < VFIO_MAX_CONTAINERS64; i++) { | |||
964 | vfio_cfgs[i].vfio_container_fd = -1; | |||
965 | vfio_cfgs[i].vfio_active_groups = 0; | |||
966 | vfio_cfgs[i].vfio_iommu_type = NULL((void*)0); | |||
967 | vfio_cfgs[i].mem_maps.lock = lock; | |||
968 | ||||
969 | for (j = 0; j < VFIO_MAX_GROUPS64; j++) { | |||
970 | vfio_cfgs[i].vfio_groups[j].fd = -1; | |||
971 | vfio_cfgs[i].vfio_groups[j].group_num = -1; | |||
972 | vfio_cfgs[i].vfio_groups[j].devices = 0; | |||
973 | } | |||
974 | } | |||
975 | ||||
976 | /* inform the user that we are probing for VFIO */ | |||
977 | RTE_LOG(INFO, EAL, "Probing VFIO support...\n")rte_log(7U, 0, "EAL" ": " "Probing VFIO support...\n"); | |||
978 | ||||
979 | /* check if vfio module is loaded */ | |||
980 | vfio_available = rte_eal_check_module(modname); | |||
981 | ||||
982 | /* return error directly */ | |||
983 | if (vfio_available == -1) { | |||
984 | RTE_LOG(INFO, EAL, "Could not get loaded module details!\n")rte_log(7U, 0, "EAL" ": " "Could not get loaded module details!\n" ); | |||
985 | return -1; | |||
986 | } | |||
987 | ||||
988 | /* return 0 if VFIO modules not loaded */ | |||
989 | if (vfio_available == 0) { | |||
990 | RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, "rte_log(8U, 0, "EAL" ": " "VFIO modules not loaded, " "skipping VFIO support...\n" ) | |||
991 | "skipping VFIO support...\n")rte_log(8U, 0, "EAL" ": " "VFIO modules not loaded, " "skipping VFIO support...\n" ); | |||
992 | return 0; | |||
993 | } | |||
994 | ||||
995 | if (internal_config.process_type == RTE_PROC_PRIMARY) { | |||
996 | /* open a new container */ | |||
997 | default_vfio_cfg->vfio_container_fd = | |||
998 | rte_vfio_get_container_fd(); | |||
999 | } else { | |||
1000 | /* get the default container from the primary process */ | |||
1001 | default_vfio_cfg->vfio_container_fd = | |||
1002 | vfio_get_default_container_fd(); | |||
1003 | } | |||
1004 | ||||
1005 | /* check if we have VFIO driver enabled */ | |||
1006 | if (default_vfio_cfg->vfio_container_fd != -1) { | |||
1007 | RTE_LOG(NOTICE, EAL, "VFIO support initialized\n")rte_log(6U, 0, "EAL" ": " "VFIO support initialized\n"); | |||
1008 | default_vfio_cfg->vfio_enabled = 1; | |||
1009 | } else { | |||
1010 | RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n")rte_log(6U, 0, "EAL" ": " "VFIO support could not be initialized\n" ); | |||
1011 | } | |||
1012 | ||||
1013 | return 0; | |||
1014 | } | |||
1015 | ||||
1016 | int | |||
1017 | rte_vfio_is_enabled(const char *modname) | |||
1018 | { | |||
1019 | const int mod_available = rte_eal_check_module(modname) > 0; | |||
1020 | return default_vfio_cfg->vfio_enabled && mod_available; | |||
1021 | } | |||
1022 | ||||
1023 | int | |||
1024 | vfio_get_default_container_fd(void) | |||
1025 | { | |||
1026 | struct rte_mp_msg mp_req, *mp_rep; | |||
1027 | struct rte_mp_reply mp_reply; | |||
1028 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; | |||
1029 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; | |||
1030 | ||||
1031 | if (default_vfio_cfg->vfio_enabled) | |||
| ||||
1032 | return default_vfio_cfg->vfio_container_fd; | |||
1033 | ||||
1034 | if (internal_config.process_type == RTE_PROC_PRIMARY) { | |||
1035 | /* if we were secondary process we would try requesting | |||
1036 | * container fd from the primary, but we're the primary | |||
1037 | * process so just exit here | |||
1038 | */ | |||
1039 | return -1; | |||
1040 | } | |||
1041 | ||||
1042 | p->req = SOCKET_REQ_DEFAULT_CONTAINER0x400; | |||
1043 | strcpy(mp_req.name, EAL_VFIO_MP"eal_vfio_mp_sync"); | |||
1044 | mp_req.len_param = sizeof(*p); | |||
1045 | mp_req.num_fds = 0; | |||
1046 | ||||
1047 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && | |||
1048 | mp_reply.nb_received == 1) { | |||
1049 | mp_rep = &mp_reply.msgs[0]; | |||
1050 | p = (struct vfio_mp_param *)mp_rep->param; | |||
1051 | if (p->result == SOCKET_OK0x0 && mp_rep->num_fds == 1) { | |||
1052 | free(mp_reply.msgs); | |||
1053 | return mp_rep->fds[0]; | |||
| ||||
1054 | } | |||
1055 | free(mp_reply.msgs); | |||
1056 | } | |||
1057 | ||||
1058 | RTE_LOG(ERR, EAL, " cannot request default container fd\n")rte_log(4U, 0, "EAL" ": " " cannot request default container fd\n" ); | |||
1059 | return -1; | |||
1060 | } | |||
1061 | ||||
1062 | int | |||
1063 | vfio_get_iommu_type(void) | |||
1064 | { | |||
1065 | if (default_vfio_cfg->vfio_iommu_type == NULL((void*)0)) | |||
1066 | return -1; | |||
1067 | ||||
1068 | return default_vfio_cfg->vfio_iommu_type->type_id; | |||
1069 | } | |||
1070 | ||||
1071 | const struct vfio_iommu_type * | |||
1072 | vfio_set_iommu_type(int vfio_container_fd) | |||
1073 | { | |||
1074 | unsigned idx; | |||
1075 | for (idx = 0; idx < RTE_DIM(iommu_types)(sizeof (iommu_types) / sizeof ((iommu_types)[0])); idx++) { | |||
1076 | const struct vfio_iommu_type *t = &iommu_types[idx]; | |||
1077 | ||||
1078 | int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 2)) << 0) | ((0) << ((0 +8)+8))), | |||
1079 | t->type_id); | |||
1080 | if (!ret) { | |||
1081 | RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",rte_log(6U, 0, "EAL" ": " " using IOMMU type %d (%s)\n", t-> type_id, t->name) | |||
1082 | t->type_id, t->name)rte_log(6U, 0, "EAL" ": " " using IOMMU type %d (%s)\n", t-> type_id, t->name); | |||
1083 | return t; | |||
1084 | } | |||
1085 | /* not an error, there may be more supported IOMMU types */ | |||
1086 | RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, "rte_log(8U, 0, "EAL" ": " " set IOMMU type %d (%s) failed, " "error %i (%s)\n", t->type_id, t->name, (*__errno_location ()), strerror((*__errno_location ()))) | |||
1087 | "error %i (%s)\n", t->type_id, t->name, errno,rte_log(8U, 0, "EAL" ": " " set IOMMU type %d (%s) failed, " "error %i (%s)\n", t->type_id, t->name, (*__errno_location ()), strerror((*__errno_location ()))) | |||
1088 | strerror(errno))rte_log(8U, 0, "EAL" ": " " set IOMMU type %d (%s) failed, " "error %i (%s)\n", t->type_id, t->name, (*__errno_location ()), strerror((*__errno_location ()))); | |||
1089 | } | |||
1090 | /* if we didn't find a suitable IOMMU type, fail */ | |||
1091 | return NULL((void*)0); | |||
1092 | } | |||
1093 | ||||
1094 | int | |||
1095 | vfio_has_supported_extensions(int vfio_container_fd) | |||
1096 | { | |||
1097 | int ret; | |||
1098 | unsigned idx, n_extensions = 0; | |||
1099 | for (idx = 0; idx < RTE_DIM(iommu_types)(sizeof (iommu_types) / sizeof ((iommu_types)[0])); idx++) { | |||
1100 | const struct vfio_iommu_type *t = &iommu_types[idx]; | |||
1101 | ||||
1102 | ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 1)) << 0) | ((0) << ((0 +8)+8))), | |||
1103 | t->type_id); | |||
1104 | if (ret < 0) { | |||
1105 | RTE_LOG(ERR, EAL, " could not get IOMMU type, "rte_log(4U, 0, "EAL" ": " " could not get IOMMU type, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1106 | "error %i (%s)\n", errno,rte_log(4U, 0, "EAL" ": " " could not get IOMMU type, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1107 | strerror(errno))rte_log(4U, 0, "EAL" ": " " could not get IOMMU type, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1108 | close(vfio_container_fd); | |||
1109 | return -1; | |||
1110 | } else if (ret == 1) { | |||
1111 | /* we found a supported extension */ | |||
1112 | n_extensions++; | |||
1113 | } | |||
1114 | RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n",rte_log(8U, 0, "EAL" ": " " IOMMU type %d (%s) is %s\n", t-> type_id, t->name, ret ? "supported" : "not supported") | |||
1115 | t->type_id, t->name,rte_log(8U, 0, "EAL" ": " " IOMMU type %d (%s) is %s\n", t-> type_id, t->name, ret ? "supported" : "not supported") | |||
1116 | ret ? "supported" : "not supported")rte_log(8U, 0, "EAL" ": " " IOMMU type %d (%s) is %s\n", t-> type_id, t->name, ret ? "supported" : "not supported"); | |||
1117 | } | |||
1118 | ||||
1119 | /* if we didn't find any supported IOMMU types, fail */ | |||
1120 | if (!n_extensions) { | |||
1121 | close(vfio_container_fd); | |||
1122 | return -1; | |||
1123 | } | |||
1124 | ||||
1125 | return 0; | |||
1126 | } | |||
1127 | ||||
1128 | int | |||
1129 | rte_vfio_get_container_fd(void) | |||
1130 | { | |||
1131 | int ret, vfio_container_fd; | |||
1132 | struct rte_mp_msg mp_req, *mp_rep; | |||
1133 | struct rte_mp_reply mp_reply; | |||
1134 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; | |||
1135 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; | |||
1136 | ||||
1137 | ||||
1138 | /* if we're in a primary process, try to open the container */ | |||
1139 | if (internal_config.process_type == RTE_PROC_PRIMARY) { | |||
1140 | vfio_container_fd = open(VFIO_CONTAINER_PATH"/dev/vfio/vfio", O_RDWR02); | |||
1141 | if (vfio_container_fd < 0) { | |||
1142 | RTE_LOG(ERR, EAL, " cannot open VFIO container, "rte_log(4U, 0, "EAL" ": " " cannot open VFIO container, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1143 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot open VFIO container, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1144 | return -1; | |||
1145 | } | |||
1146 | ||||
1147 | /* check VFIO API version */ | |||
1148 | ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 0)) << 0) | ((0) << ((0 +8)+8)))); | |||
1149 | if (ret != VFIO_API_VERSION0) { | |||
1150 | if (ret < 0) | |||
1151 | RTE_LOG(ERR, EAL, " could not get VFIO API version, "rte_log(4U, 0, "EAL" ": " " could not get VFIO API version, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))) | |||
1152 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " could not get VFIO API version, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))); | |||
1153 | else | |||
1154 | RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n")rte_log(4U, 0, "EAL" ": " " unsupported VFIO API version!\n" ); | |||
1155 | close(vfio_container_fd); | |||
1156 | return -1; | |||
1157 | } | |||
1158 | ||||
1159 | ret = vfio_has_supported_extensions(vfio_container_fd); | |||
1160 | if (ret) { | |||
1161 | RTE_LOG(ERR, EAL, " no supported IOMMU "rte_log(4U, 0, "EAL" ": " " no supported IOMMU " "extensions found!\n" ) | |||
1162 | "extensions found!\n")rte_log(4U, 0, "EAL" ": " " no supported IOMMU " "extensions found!\n" ); | |||
1163 | return -1; | |||
1164 | } | |||
1165 | ||||
1166 | return vfio_container_fd; | |||
1167 | } | |||
1168 | /* | |||
1169 | * if we're in a secondary process, request container fd from the | |||
1170 | * primary process via mp channel | |||
1171 | */ | |||
1172 | p->req = SOCKET_REQ_CONTAINER0x100; | |||
1173 | strcpy(mp_req.name, EAL_VFIO_MP"eal_vfio_mp_sync"); | |||
1174 | mp_req.len_param = sizeof(*p); | |||
1175 | mp_req.num_fds = 0; | |||
1176 | ||||
1177 | vfio_container_fd = -1; | |||
1178 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && | |||
1179 | mp_reply.nb_received == 1) { | |||
1180 | mp_rep = &mp_reply.msgs[0]; | |||
1181 | p = (struct vfio_mp_param *)mp_rep->param; | |||
1182 | if (p->result == SOCKET_OK0x0 && mp_rep->num_fds == 1) { | |||
1183 | vfio_container_fd = mp_rep->fds[0]; | |||
1184 | free(mp_reply.msgs); | |||
1185 | return vfio_container_fd; | |||
1186 | } | |||
1187 | free(mp_reply.msgs); | |||
1188 | } | |||
1189 | ||||
1190 | RTE_LOG(ERR, EAL, " cannot request container fd\n")rte_log(4U, 0, "EAL" ": " " cannot request container fd\n"); | |||
1191 | return -1; | |||
1192 | } | |||
1193 | ||||
1194 | int | |||
1195 | rte_vfio_get_group_num(const char *sysfs_base, | |||
1196 | const char *dev_addr, int *iommu_group_num) | |||
1197 | { | |||
1198 | char linkname[PATH_MAX4096]; | |||
1199 | char filename[PATH_MAX4096]; | |||
1200 | char *tok[16], *group_tok, *end; | |||
1201 | int ret; | |||
1202 | ||||
1203 | memset(linkname, 0, sizeof(linkname)); | |||
1204 | memset(filename, 0, sizeof(filename)); | |||
1205 | ||||
1206 | /* try to find out IOMMU group for this device */ | |||
1207 | snprintf(linkname, sizeof(linkname), | |||
1208 | "%s/%s/iommu_group", sysfs_base, dev_addr); | |||
1209 | ||||
1210 | ret = readlink(linkname, filename, sizeof(filename)); | |||
1211 | ||||
1212 | /* if the link doesn't exist, no VFIO for us */ | |||
1213 | if (ret < 0) | |||
1214 | return 0; | |||
1215 | ||||
1216 | ret = rte_strsplit(filename, sizeof(filename), | |||
1217 | tok, RTE_DIM(tok)(sizeof (tok) / sizeof ((tok)[0])), '/'); | |||
1218 | ||||
1219 | if (ret <= 0) { | |||
1220 | RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", dev_addr)rte_log(4U, 0, "EAL" ": " " %s cannot get IOMMU group\n", dev_addr ); | |||
1221 | return -1; | |||
1222 | } | |||
1223 | ||||
1224 | /* IOMMU group is always the last token */ | |||
1225 | errno(*__errno_location ()) = 0; | |||
1226 | group_tok = tok[ret - 1]; | |||
1227 | end = group_tok; | |||
1228 | *iommu_group_num = strtol(group_tok, &end, 10); | |||
1229 | if ((end != group_tok && *end != '\0') || errno(*__errno_location ()) != 0) { | |||
1230 | RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr)rte_log(4U, 0, "EAL" ": " " %s error parsing IOMMU number!\n" , dev_addr); | |||
1231 | return -1; | |||
1232 | } | |||
1233 | ||||
1234 | return 1; | |||
1235 | } | |||
1236 | ||||
1237 | static int | |||
1238 | type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms, | |||
1239 | void *arg) | |||
1240 | { | |||
1241 | int *vfio_container_fd = arg; | |||
1242 | ||||
1243 | if (msl->external) | |||
1244 | return 0; | |||
1245 | ||||
1246 | return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova, | |||
1247 | ms->len, 1); | |||
1248 | } | |||
1249 | ||||
1250 | static int | |||
1251 | vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, | |||
1252 | uint64_t len, int do_map) | |||
1253 | { | |||
1254 | struct vfio_iommu_type1_dma_map dma_map; | |||
1255 | struct vfio_iommu_type1_dma_unmap dma_unmap; | |||
1256 | int ret; | |||
1257 | ||||
1258 | if (do_map != 0) { | |||
1259 | memset(&dma_map, 0, sizeof(dma_map)); | |||
1260 | dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); | |||
1261 | dma_map.vaddr = vaddr; | |||
1262 | dma_map.size = len; | |||
1263 | dma_map.iova = iova; | |||
1264 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ(1 << 0) | | |||
1265 | VFIO_DMA_MAP_FLAG_WRITE(1 << 1); | |||
1266 | ||||
1267 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 13)) << 0) | ((0) << ((0 +8)+8))), &dma_map); | |||
1268 | if (ret) { | |||
1269 | /** | |||
1270 | * In case the mapping was already done EEXIST will be | |||
1271 | * returned from kernel. | |||
1272 | */ | |||
1273 | if (errno(*__errno_location ()) == EEXIST17) { | |||
1274 | RTE_LOG(DEBUG, EAL,rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping") | |||
1275 | " Memory segment is allready mapped,"rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping") | |||
1276 | " skipping")rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping"); | |||
1277 | } else { | |||
1278 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1279 | " cannot set up DMA remapping,"rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1280 | " error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1281 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1282 | return -1; | |||
1283 | } | |||
1284 | } | |||
1285 | } else { | |||
1286 | memset(&dma_unmap, 0, sizeof(dma_unmap)); | |||
1287 | dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); | |||
1288 | dma_unmap.size = len; | |||
1289 | dma_unmap.iova = iova; | |||
1290 | ||||
1291 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 14)) << 0) | ((0) << ((0 +8)+8))), | |||
1292 | &dma_unmap); | |||
1293 | if (ret) { | |||
1294 | RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot clear DMA remapping, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1295 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot clear DMA remapping, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1296 | return -1; | |||
1297 | } | |||
1298 | } | |||
1299 | ||||
1300 | return 0; | |||
1301 | } | |||
1302 | ||||
1303 | static int | |||
1304 | vfio_type1_dma_map(int vfio_container_fd) | |||
1305 | { | |||
1306 | return rte_memseg_walk(type1_map, &vfio_container_fd); | |||
1307 | } | |||
1308 | ||||
1309 | static int | |||
1310 | vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, | |||
1311 | uint64_t len, int do_map) | |||
1312 | { | |||
1313 | struct vfio_iommu_type1_dma_map dma_map; | |||
1314 | struct vfio_iommu_type1_dma_unmap dma_unmap; | |||
1315 | int ret; | |||
1316 | struct vfio_iommu_spapr_register_memory reg = { | |||
1317 | .argsz = sizeof(reg), | |||
1318 | .flags = 0 | |||
1319 | }; | |||
1320 | reg.vaddr = (uintptr_t) vaddr; | |||
1321 | reg.size = len; | |||
1322 | ||||
1323 | if (do_map != 0) { | |||
1324 | ret = ioctl(vfio_container_fd, | |||
1325 | VFIO_IOMMU_SPAPR_REGISTER_MEMORY(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 17)) << 0) | ((0) << ((0 +8)+8))), ®); | |||
1326 | if (ret) { | |||
1327 | RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, "rte_log(4U, 0, "EAL" ": " " cannot register vaddr for IOMMU, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))) | |||
1328 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot register vaddr for IOMMU, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))); | |||
1329 | return -1; | |||
1330 | } | |||
1331 | ||||
1332 | memset(&dma_map, 0, sizeof(dma_map)); | |||
1333 | dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); | |||
1334 | dma_map.vaddr = vaddr; | |||
1335 | dma_map.size = len; | |||
1336 | dma_map.iova = iova; | |||
1337 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ(1 << 0) | | |||
1338 | VFIO_DMA_MAP_FLAG_WRITE(1 << 1); | |||
1339 | ||||
1340 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 13)) << 0) | ((0) << ((0 +8)+8))), &dma_map); | |||
1341 | if (ret) { | |||
1342 | /** | |||
1343 | * In case the mapping was already done EBUSY will be | |||
1344 | * returned from kernel. | |||
1345 | */ | |||
1346 | if (errno(*__errno_location ()) == EBUSY16) { | |||
1347 | RTE_LOG(DEBUG, EAL,rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping") | |||
1348 | " Memory segment is allready mapped,"rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping") | |||
1349 | " skipping")rte_log(8U, 0, "EAL" ": " " Memory segment is allready mapped," " skipping"); | |||
1350 | } else { | |||
1351 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1352 | " cannot set up DMA remapping,"rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1353 | " error %i (%s)\n", errno,rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1354 | strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot set up DMA remapping," " error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1355 | return -1; | |||
1356 | } | |||
1357 | } | |||
1358 | ||||
1359 | } else { | |||
1360 | ret = ioctl(vfio_container_fd, | |||
1361 | VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 18)) << 0) | ((0) << ((0 +8)+8))), ®); | |||
1362 | if (ret) { | |||
1363 | RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot unregister vaddr for IOMMU, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1364 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot unregister vaddr for IOMMU, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1365 | return -1; | |||
1366 | } | |||
1367 | ||||
1368 | memset(&dma_unmap, 0, sizeof(dma_unmap)); | |||
1369 | dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); | |||
1370 | dma_unmap.size = len; | |||
1371 | dma_unmap.iova = iova; | |||
1372 | ||||
1373 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 14)) << 0) | ((0) << ((0 +8)+8))), | |||
1374 | &dma_unmap); | |||
1375 | if (ret) { | |||
1376 | RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot clear DMA remapping, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1377 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot clear DMA remapping, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1378 | return -1; | |||
1379 | } | |||
1380 | } | |||
1381 | ||||
1382 | return 0; | |||
1383 | } | |||
1384 | ||||
1385 | static int | |||
1386 | vfio_spapr_map_walk(const struct rte_memseg_list *msl, | |||
1387 | const struct rte_memseg *ms, void *arg) | |||
1388 | { | |||
1389 | int *vfio_container_fd = arg; | |||
1390 | ||||
1391 | if (msl->external) | |||
1392 | return 0; | |||
1393 | ||||
1394 | return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova, | |||
1395 | ms->len, 1); | |||
1396 | } | |||
1397 | ||||
1398 | struct spapr_walk_param { | |||
1399 | uint64_t window_size; | |||
1400 | uint64_t hugepage_sz; | |||
1401 | }; | |||
1402 | static int | |||
1403 | vfio_spapr_window_size_walk(const struct rte_memseg_list *msl, | |||
1404 | const struct rte_memseg *ms, void *arg) | |||
1405 | { | |||
1406 | struct spapr_walk_param *param = arg; | |||
1407 | uint64_t max = ms->iova + ms->len; | |||
1408 | ||||
1409 | if (msl->external) | |||
1410 | return 0; | |||
1411 | ||||
1412 | if (max > param->window_size) { | |||
1413 | param->hugepage_sz = ms->hugepage_sz; | |||
1414 | param->window_size = max; | |||
1415 | } | |||
1416 | ||||
1417 | return 0; | |||
1418 | } | |||
1419 | ||||
1420 | static int | |||
1421 | vfio_spapr_create_new_dma_window(int vfio_container_fd, | |||
1422 | struct vfio_iommu_spapr_tce_create *create) { | |||
1423 | struct vfio_iommu_spapr_tce_remove remove = { | |||
1424 | .argsz = sizeof(remove), | |||
1425 | }; | |||
1426 | struct vfio_iommu_spapr_tce_info info = { | |||
1427 | .argsz = sizeof(info), | |||
1428 | }; | |||
1429 | int ret; | |||
1430 | ||||
1431 | /* query spapr iommu info */ | |||
1432 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 12)) << 0) | ((0) << ((0 +8)+8))), &info); | |||
1433 | if (ret) { | |||
1434 | RTE_LOG(ERR, EAL, " cannot get iommu info, "rte_log(4U, 0, "EAL" ": " " cannot get iommu info, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1435 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot get iommu info, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1436 | return -1; | |||
1437 | } | |||
1438 | ||||
1439 | /* remove default DMA of 32 bit window */ | |||
1440 | remove.start_addr = info.dma32_window_start; | |||
1441 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 20)) << 0) | ((0) << ((0 +8)+8))), &remove); | |||
1442 | if (ret) { | |||
1443 | RTE_LOG(ERR, EAL, " cannot remove default DMA window, "rte_log(4U, 0, "EAL" ": " " cannot remove default DMA window, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))) | |||
1444 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot remove default DMA window, " "error %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))); | |||
1445 | return -1; | |||
1446 | } | |||
1447 | ||||
1448 | /* create new DMA window */ | |||
1449 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 19)) << 0) | ((0) << ((0 +8)+8))), create); | |||
1450 | if (ret) { | |||
1451 | RTE_LOG(ERR, EAL, " cannot create new DMA window, "rte_log(4U, 0, "EAL" ": " " cannot create new DMA window, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1452 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot create new DMA window, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1453 | return -1; | |||
1454 | } | |||
1455 | ||||
1456 | if (create->start_addr != 0) { | |||
1457 | RTE_LOG(ERR, EAL, " DMA window start address != 0\n")rte_log(4U, 0, "EAL" ": " " DMA window start address != 0\n" ); | |||
1458 | return -1; | |||
1459 | } | |||
1460 | ||||
1461 | return 0; | |||
1462 | } | |||
1463 | ||||
1464 | static int | |||
1465 | vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, | |||
1466 | uint64_t len, int do_map) | |||
1467 | { | |||
1468 | struct spapr_walk_param param; | |||
1469 | struct vfio_iommu_spapr_tce_create create = { | |||
1470 | .argsz = sizeof(create), | |||
1471 | }; | |||
1472 | struct vfio_config *vfio_cfg; | |||
1473 | struct user_mem_maps *user_mem_maps; | |||
1474 | int i, ret = 0; | |||
1475 | ||||
1476 | vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd); | |||
1477 | if (vfio_cfg == NULL((void*)0)) { | |||
1478 | RTE_LOG(ERR, EAL, " invalid container fd!\n")rte_log(4U, 0, "EAL" ": " " invalid container fd!\n"); | |||
1479 | return -1; | |||
1480 | } | |||
1481 | ||||
1482 | user_mem_maps = &vfio_cfg->mem_maps; | |||
1483 | rte_spinlock_recursive_lock(&user_mem_maps->lock); | |||
1484 | ||||
1485 | /* check if window size needs to be adjusted */ | |||
1486 | memset(¶m, 0, sizeof(param)); | |||
1487 | ||||
1488 | /* we're inside a callback so use thread-unsafe version */ | |||
1489 | if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk, | |||
1490 | ¶m) < 0) { | |||
1491 | RTE_LOG(ERR, EAL, "Could not get window size\n")rte_log(4U, 0, "EAL" ": " "Could not get window size\n"); | |||
1492 | ret = -1; | |||
1493 | goto out; | |||
1494 | } | |||
1495 | ||||
1496 | /* also check user maps */ | |||
1497 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |||
1498 | uint64_t max = user_mem_maps->maps[i].iova + | |||
1499 | user_mem_maps->maps[i].len; | |||
1500 | create.window_size = RTE_MAX(create.window_size, max)__extension__ ({ __typeof__ (create.window_size) _a = (create .window_size); __typeof__ (max) _b = (max); _a > _b ? _a : _b; }); | |||
1501 | } | |||
1502 | ||||
1503 | /* sPAPR requires window size to be a power of 2 */ | |||
1504 | create.window_size = rte_align64pow2(param.window_size); | |||
1505 | create.page_shift = __builtin_ctzll(param.hugepage_sz); | |||
1506 | create.levels = 1; | |||
1507 | ||||
1508 | if (do_map) { | |||
1509 | void *addr; | |||
1510 | /* re-create window and remap the entire memory */ | |||
1511 | if (iova > create.window_size) { | |||
1512 | if (vfio_spapr_create_new_dma_window(vfio_container_fd, | |||
1513 | &create) < 0) { | |||
1514 | RTE_LOG(ERR, EAL, "Could not create new DMA window\n")rte_log(4U, 0, "EAL" ": " "Could not create new DMA window\n" ); | |||
1515 | ret = -1; | |||
1516 | goto out; | |||
1517 | } | |||
1518 | /* we're inside a callback, so use thread-unsafe version | |||
1519 | */ | |||
1520 | if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk, | |||
1521 | &vfio_container_fd) < 0) { | |||
1522 | RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n")rte_log(4U, 0, "EAL" ": " "Could not recreate DMA maps\n"); | |||
1523 | ret = -1; | |||
1524 | goto out; | |||
1525 | } | |||
1526 | /* remap all user maps */ | |||
1527 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |||
1528 | struct user_mem_map *map = | |||
1529 | &user_mem_maps->maps[i]; | |||
1530 | if (vfio_spapr_dma_do_map(vfio_container_fd, | |||
1531 | map->addr, map->iova, map->len, | |||
1532 | 1)) { | |||
1533 | RTE_LOG(ERR, EAL, "Could not recreate user DMA maps\n")rte_log(4U, 0, "EAL" ": " "Could not recreate user DMA maps\n" ); | |||
1534 | ret = -1; | |||
1535 | goto out; | |||
1536 | } | |||
1537 | } | |||
1538 | } | |||
1539 | ||||
1540 | /* now that we've remapped all of the memory that was present | |||
1541 | * before, map the segment that we were requested to map. | |||
1542 | * | |||
1543 | * however, if we were called by the callback, the memory we | |||
1544 | * were called with was already in the memseg list, so previous | |||
1545 | * mapping should've mapped that segment already. | |||
1546 | * | |||
1547 | * virt2memseg_list is a relatively cheap check, so use that. if | |||
1548 | * memory is within any memseg list, it's a memseg, so it's | |||
1549 | * already mapped. | |||
1550 | */ | |||
1551 | addr = (void *)(uintptr_t)vaddr; | |||
1552 | if (rte_mem_virt2memseg_list(addr) == NULL((void*)0) && | |||
1553 | vfio_spapr_dma_do_map(vfio_container_fd, | |||
1554 | vaddr, iova, len, 1) < 0) { | |||
1555 | RTE_LOG(ERR, EAL, "Could not map segment\n")rte_log(4U, 0, "EAL" ": " "Could not map segment\n"); | |||
1556 | ret = -1; | |||
1557 | goto out; | |||
1558 | } | |||
1559 | } else { | |||
1560 | /* for unmap, check if iova within DMA window */ | |||
1561 | if (iova > create.window_size) { | |||
1562 | RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap")rte_log(4U, 0, "EAL" ": " "iova beyond DMA window for unmap"); | |||
1563 | ret = -1; | |||
1564 | goto out; | |||
1565 | } | |||
1566 | ||||
1567 | vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0); | |||
1568 | } | |||
1569 | out: | |||
1570 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); | |||
1571 | return ret; | |||
1572 | } | |||
1573 | ||||
1574 | static int | |||
1575 | vfio_spapr_dma_map(int vfio_container_fd) | |||
1576 | { | |||
1577 | struct vfio_iommu_spapr_tce_create create = { | |||
1578 | .argsz = sizeof(create), | |||
1579 | }; | |||
1580 | struct spapr_walk_param param; | |||
1581 | ||||
1582 | memset(¶m, 0, sizeof(param)); | |||
1583 | ||||
1584 | /* create DMA window from 0 to max(phys_addr + len) */ | |||
1585 | rte_memseg_walk(vfio_spapr_window_size_walk, ¶m); | |||
1586 | ||||
1587 | /* sPAPR requires window size to be a power of 2 */ | |||
1588 | create.window_size = rte_align64pow2(param.window_size); | |||
1589 | create.page_shift = __builtin_ctzll(param.hugepage_sz); | |||
1590 | create.levels = 1; | |||
1591 | ||||
1592 | if (vfio_spapr_create_new_dma_window(vfio_container_fd, &create) < 0) { | |||
1593 | RTE_LOG(ERR, EAL, "Could not create new DMA window\n")rte_log(4U, 0, "EAL" ": " "Could not create new DMA window\n" ); | |||
1594 | return -1; | |||
1595 | } | |||
1596 | ||||
1597 | /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ | |||
1598 | if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0) | |||
1599 | return -1; | |||
1600 | ||||
1601 | return 0; | |||
1602 | } | |||
1603 | ||||
1604 | static int | |||
1605 | vfio_noiommu_dma_map(int __rte_unused__attribute__((__unused__)) vfio_container_fd) | |||
1606 | { | |||
1607 | /* No-IOMMU mode does not need DMA mapping */ | |||
1608 | return 0; | |||
1609 | } | |||
1610 | ||||
1611 | static int | |||
1612 | vfio_noiommu_dma_mem_map(int __rte_unused__attribute__((__unused__)) vfio_container_fd, | |||
1613 | uint64_t __rte_unused__attribute__((__unused__)) vaddr, | |||
1614 | uint64_t __rte_unused__attribute__((__unused__)) iova, uint64_t __rte_unused__attribute__((__unused__)) len, | |||
1615 | int __rte_unused__attribute__((__unused__)) do_map) | |||
1616 | { | |||
1617 | /* No-IOMMU mode does not need DMA mapping */ | |||
1618 | return 0; | |||
1619 | } | |||
1620 | ||||
1621 | static int | |||
1622 | vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, | |||
1623 | uint64_t len, int do_map) | |||
1624 | { | |||
1625 | const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type; | |||
1626 | ||||
1627 | if (!t) { | |||
1628 | RTE_LOG(ERR, EAL, " VFIO support not initialized\n")rte_log(4U, 0, "EAL" ": " " VFIO support not initialized\n"); | |||
1629 | rte_errno(per_lcore__rte_errno) = ENODEV19; | |||
1630 | return -1; | |||
1631 | } | |||
1632 | ||||
1633 | if (!t->dma_user_map_func) { | |||
1634 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " VFIO custom DMA region maping not supported by IOMMU %s\n" , t->name) | |||
1635 | " VFIO custom DMA region maping not supported by IOMMU %s\n",rte_log(4U, 0, "EAL" ": " " VFIO custom DMA region maping not supported by IOMMU %s\n" , t->name) | |||
1636 | t->name)rte_log(4U, 0, "EAL" ": " " VFIO custom DMA region maping not supported by IOMMU %s\n" , t->name); | |||
1637 | rte_errno(per_lcore__rte_errno) = ENOTSUP95; | |||
1638 | return -1; | |||
1639 | } | |||
1640 | ||||
1641 | return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova, | |||
1642 | len, do_map); | |||
1643 | } | |||
1644 | ||||
1645 | static int | |||
1646 | container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, | |||
1647 | uint64_t len) | |||
1648 | { | |||
1649 | struct user_mem_map *new_map; | |||
1650 | struct user_mem_maps *user_mem_maps; | |||
1651 | int ret = 0; | |||
1652 | ||||
1653 | user_mem_maps = &vfio_cfg->mem_maps; | |||
1654 | rte_spinlock_recursive_lock(&user_mem_maps->lock); | |||
1655 | if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS256) { | |||
1656 | RTE_LOG(ERR, EAL, "No more space for user mem maps\n")rte_log(4U, 0, "EAL" ": " "No more space for user mem maps\n" ); | |||
1657 | rte_errno(per_lcore__rte_errno) = ENOMEM12; | |||
1658 | ret = -1; | |||
1659 | goto out; | |||
1660 | } | |||
1661 | /* map the entry */ | |||
1662 | if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) { | |||
1663 | /* technically, this will fail if there are currently no devices | |||
1664 | * plugged in, even if a device were added later, this mapping | |||
1665 | * might have succeeded. however, since we cannot verify if this | |||
1666 | * is a valid mapping without having a device attached, consider | |||
1667 | * this to be unsupported, because we can't just store any old | |||
1668 | * mapping and pollute list of active mappings willy-nilly. | |||
1669 | */ | |||
1670 | RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n")rte_log(4U, 0, "EAL" ": " "Couldn't map new region for DMA\n" ); | |||
1671 | ret = -1; | |||
1672 | goto out; | |||
1673 | } | |||
1674 | /* create new user mem map entry */ | |||
1675 | new_map = &user_mem_maps->maps[user_mem_maps->n_maps++]; | |||
1676 | new_map->addr = vaddr; | |||
1677 | new_map->iova = iova; | |||
1678 | new_map->len = len; | |||
1679 | ||||
1680 | compact_user_maps(user_mem_maps); | |||
1681 | out: | |||
1682 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); | |||
1683 | return ret; | |||
1684 | } | |||
1685 | ||||
1686 | static int | |||
1687 | container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, | |||
1688 | uint64_t len) | |||
1689 | { | |||
1690 | struct user_mem_map *map, *new_map = NULL((void*)0); | |||
1691 | struct user_mem_maps *user_mem_maps; | |||
1692 | int ret = 0; | |||
1693 | ||||
1694 | user_mem_maps = &vfio_cfg->mem_maps; | |||
1695 | rte_spinlock_recursive_lock(&user_mem_maps->lock); | |||
1696 | ||||
1697 | /* find our mapping */ | |||
1698 | map = find_user_mem_map(user_mem_maps, vaddr, iova, len); | |||
1699 | if (!map) { | |||
1700 | RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n")rte_log(4U, 0, "EAL" ": " "Couldn't find previously mapped region\n" ); | |||
1701 | rte_errno(per_lcore__rte_errno) = EINVAL22; | |||
1702 | ret = -1; | |||
1703 | goto out; | |||
1704 | } | |||
1705 | if (map->addr != vaddr || map->iova != iova || map->len != len) { | |||
1706 | /* we're partially unmapping a previously mapped region, so we | |||
1707 | * need to split entry into two. | |||
1708 | */ | |||
1709 | if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS256) { | |||
1710 | RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n")rte_log(4U, 0, "EAL" ": " "Not enough space to store partial mapping\n" ); | |||
1711 | rte_errno(per_lcore__rte_errno) = ENOMEM12; | |||
1712 | ret = -1; | |||
1713 | goto out; | |||
1714 | } | |||
1715 | new_map = &user_mem_maps->maps[user_mem_maps->n_maps++]; | |||
1716 | } | |||
1717 | ||||
1718 | /* unmap the entry */ | |||
1719 | if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) { | |||
1720 | /* there may not be any devices plugged in, so unmapping will | |||
1721 | * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't | |||
1722 | * stop us from removing the mapping, as the assumption is we | |||
1723 | * won't be needing this memory any more and thus will want to | |||
1724 | * prevent it from being remapped again on hotplug. so, only | |||
1725 | * fail if we indeed failed to unmap (e.g. if the mapping was | |||
1726 | * within our mapped range but had invalid alignment). | |||
1727 | */ | |||
1728 | if (rte_errno(per_lcore__rte_errno) != ENODEV19 && rte_errno(per_lcore__rte_errno) != ENOTSUP95) { | |||
1729 | RTE_LOG(ERR, EAL, "Couldn't unmap region for DMA\n")rte_log(4U, 0, "EAL" ": " "Couldn't unmap region for DMA\n"); | |||
1730 | ret = -1; | |||
1731 | goto out; | |||
1732 | } else { | |||
1733 | RTE_LOG(DEBUG, EAL, "DMA unmapping failed, but removing mappings anyway\n")rte_log(8U, 0, "EAL" ": " "DMA unmapping failed, but removing mappings anyway\n" ); | |||
1734 | } | |||
1735 | } | |||
1736 | /* remove map from the list of active mappings */ | |||
1737 | if (new_map != NULL((void*)0)) { | |||
1738 | adjust_map(map, new_map, vaddr, len); | |||
1739 | ||||
1740 | /* if we've created a new map by splitting, sort everything */ | |||
1741 | if (!is_null_map(new_map)) { | |||
1742 | compact_user_maps(user_mem_maps); | |||
1743 | } else { | |||
1744 | /* we've created a new mapping, but it was unused */ | |||
1745 | user_mem_maps->n_maps--; | |||
1746 | } | |||
1747 | } else { | |||
1748 | memset(map, 0, sizeof(*map)); | |||
1749 | compact_user_maps(user_mem_maps); | |||
1750 | user_mem_maps->n_maps--; | |||
1751 | } | |||
1752 | ||||
1753 | out: | |||
1754 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); | |||
1755 | return ret; | |||
1756 | } | |||
1757 | ||||
1758 | int | |||
1759 | rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) | |||
1760 | { | |||
1761 | if (len == 0) { | |||
1762 | rte_errno(per_lcore__rte_errno) = EINVAL22; | |||
1763 | return -1; | |||
1764 | } | |||
1765 | ||||
1766 | return container_dma_map(default_vfio_cfg, vaddr, iova, len); | |||
1767 | } | |||
1768 | ||||
1769 | int | |||
1770 | rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) | |||
1771 | { | |||
1772 | if (len == 0) { | |||
1773 | rte_errno(per_lcore__rte_errno) = EINVAL22; | |||
1774 | return -1; | |||
1775 | } | |||
1776 | ||||
1777 | return container_dma_unmap(default_vfio_cfg, vaddr, iova, len); | |||
1778 | } | |||
1779 | ||||
1780 | int | |||
1781 | rte_vfio_noiommu_is_enabled(void) | |||
1782 | { | |||
1783 | int fd; | |||
1784 | ssize_t cnt; | |||
1785 | char c; | |||
1786 | ||||
1787 | fd = open(VFIO_NOIOMMU_MODE"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode", O_RDONLY00); | |||
1788 | if (fd < 0) { | |||
1789 | if (errno(*__errno_location ()) != ENOENT2) { | |||
1790 | RTE_LOG(ERR, EAL, " cannot open vfio noiommu file %i (%s)\n",rte_log(4U, 0, "EAL" ": " " cannot open vfio noiommu file %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) | |||
1791 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot open vfio noiommu file %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); | |||
1792 | return -1; | |||
1793 | } | |||
1794 | /* | |||
1795 | * else the file does not exists | |||
1796 | * i.e. noiommu is not enabled | |||
1797 | */ | |||
1798 | return 0; | |||
1799 | } | |||
1800 | ||||
1801 | cnt = read(fd, &c, 1); | |||
1802 | close(fd); | |||
1803 | if (cnt != 1) { | |||
1804 | RTE_LOG(ERR, EAL, " unable to read from vfio noiommu "rte_log(4U, 0, "EAL" ": " " unable to read from vfio noiommu " "file %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))) | |||
1805 | "file %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " unable to read from vfio noiommu " "file %i (%s)\n", (*__errno_location ()), strerror((*__errno_location ()))); | |||
1806 | return -1; | |||
1807 | } | |||
1808 | ||||
1809 | return c == 'Y'; | |||
1810 | } | |||
1811 | ||||
1812 | int | |||
1813 | rte_vfio_container_create(void) | |||
1814 | { | |||
1815 | int i; | |||
1816 | ||||
1817 | /* Find an empty slot to store new vfio config */ | |||
1818 | for (i = 1; i < VFIO_MAX_CONTAINERS64; i++) { | |||
1819 | if (vfio_cfgs[i].vfio_container_fd == -1) | |||
1820 | break; | |||
1821 | } | |||
1822 | ||||
1823 | if (i == VFIO_MAX_CONTAINERS64) { | |||
1824 | RTE_LOG(ERR, EAL, "exceed max vfio container limit\n")rte_log(4U, 0, "EAL" ": " "exceed max vfio container limit\n" ); | |||
1825 | return -1; | |||
1826 | } | |||
1827 | ||||
1828 | vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd(); | |||
1829 | if (vfio_cfgs[i].vfio_container_fd < 0) { | |||
1830 | RTE_LOG(NOTICE, EAL, "fail to create a new container\n")rte_log(6U, 0, "EAL" ": " "fail to create a new container\n"); | |||
1831 | return -1; | |||
1832 | } | |||
1833 | ||||
1834 | return vfio_cfgs[i].vfio_container_fd; | |||
1835 | } | |||
1836 | ||||
1837 | int __rte_experimental__attribute__((section(".text.experimental"))) | |||
1838 | rte_vfio_container_destroy(int container_fd) | |||
1839 | { | |||
1840 | struct vfio_config *vfio_cfg; | |||
1841 | int i; | |||
1842 | ||||
1843 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |||
1844 | if (vfio_cfg == NULL((void*)0)) { | |||
1845 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); | |||
1846 | return -1; | |||
1847 | } | |||
1848 | ||||
1849 | for (i = 0; i < VFIO_MAX_GROUPS64; i++) | |||
1850 | if (vfio_cfg->vfio_groups[i].group_num != -1) | |||
1851 | rte_vfio_container_group_unbind(container_fd, | |||
1852 | vfio_cfg->vfio_groups[i].group_num); | |||
1853 | ||||
1854 | close(container_fd); | |||
1855 | vfio_cfg->vfio_container_fd = -1; | |||
1856 | vfio_cfg->vfio_active_groups = 0; | |||
1857 | vfio_cfg->vfio_iommu_type = NULL((void*)0); | |||
1858 | ||||
1859 | return 0; | |||
1860 | } | |||
1861 | ||||
1862 | int | |||
1863 | rte_vfio_container_group_bind(int container_fd, int iommu_group_num) | |||
1864 | { | |||
1865 | struct vfio_config *vfio_cfg; | |||
1866 | ||||
1867 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |||
1868 | if (vfio_cfg == NULL((void*)0)) { | |||
1869 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); | |||
1870 | return -1; | |||
1871 | } | |||
1872 | ||||
1873 | return vfio_get_group_fd(vfio_cfg, iommu_group_num); | |||
1874 | } | |||
1875 | ||||
1876 | int | |||
1877 | rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) | |||
1878 | { | |||
1879 | struct vfio_config *vfio_cfg; | |||
1880 | struct vfio_group *cur_grp = NULL((void*)0); | |||
1881 | int i; | |||
1882 | ||||
1883 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |||
1884 | if (vfio_cfg == NULL((void*)0)) { | |||
1885 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); | |||
1886 | return -1; | |||
1887 | } | |||
1888 | ||||
1889 | for (i = 0; i < VFIO_MAX_GROUPS64; i++) { | |||
1890 | if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) { | |||
1891 | cur_grp = &vfio_cfg->vfio_groups[i]; | |||
1892 | break; | |||
1893 | } | |||
1894 | } | |||
1895 | ||||
1896 | /* This should not happen */ | |||
1897 | if (i == VFIO_MAX_GROUPS64 || cur_grp == NULL((void*)0)) { | |||
1898 | RTE_LOG(ERR, EAL, "Specified group number not found\n")rte_log(4U, 0, "EAL" ": " "Specified group number not found\n" ); | |||
1899 | return -1; | |||
1900 | } | |||
1901 | ||||
1902 | if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) { | |||
1903 | RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for"rte_log(4U, 0, "EAL" ": " "Error when closing vfio_group_fd for" " iommu_group_num %d\n", iommu_group_num) | |||
1904 | " iommu_group_num %d\n", iommu_group_num)rte_log(4U, 0, "EAL" ": " "Error when closing vfio_group_fd for" " iommu_group_num %d\n", iommu_group_num); | |||
1905 | return -1; | |||
1906 | } | |||
1907 | cur_grp->group_num = -1; | |||
1908 | cur_grp->fd = -1; | |||
1909 | cur_grp->devices = 0; | |||
1910 | vfio_cfg->vfio_active_groups--; | |||
1911 | ||||
1912 | return 0; | |||
1913 | } | |||
1914 | ||||
1915 | int | |||
1916 | rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova, | |||
1917 | uint64_t len) | |||
1918 | { | |||
1919 | struct vfio_config *vfio_cfg; | |||
1920 | ||||
1921 | if (len == 0) { | |||
1922 | rte_errno(per_lcore__rte_errno) = EINVAL22; | |||
1923 | return -1; | |||
1924 | } | |||
1925 | ||||
1926 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |||
1927 | if (vfio_cfg == NULL((void*)0)) { | |||
1928 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); | |||
1929 | return -1; | |||
1930 | } | |||
1931 | ||||
1932 | return container_dma_map(vfio_cfg, vaddr, iova, len); | |||
1933 | } | |||
1934 | ||||
1935 | int | |||
1936 | rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova, | |||
1937 | uint64_t len) | |||
1938 | { | |||
1939 | struct vfio_config *vfio_cfg; | |||
1940 | ||||
1941 | if (len == 0) { | |||
1942 | rte_errno(per_lcore__rte_errno) = EINVAL22; | |||
1943 | return -1; | |||
1944 | } | |||
1945 | ||||
1946 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |||
1947 | if (vfio_cfg == NULL((void*)0)) { | |||
1948 | RTE_LOG(ERR, EAL, "Invalid container fd\n")rte_log(4U, 0, "EAL" ": " "Invalid container fd\n"); | |||
1949 | return -1; | |||
1950 | } | |||
1951 | ||||
1952 | return container_dma_unmap(vfio_cfg, vaddr, iova, len); | |||
1953 | } | |||
1954 | ||||
1955 | #else | |||
1956 | ||||
1957 | int | |||
1958 | rte_vfio_dma_map(uint64_t __rte_unused__attribute__((__unused__)) vaddr, __rte_unused__attribute__((__unused__)) uint64_t iova, | |||
1959 | __rte_unused__attribute__((__unused__)) uint64_t len) | |||
1960 | { | |||
1961 | return -1; | |||
1962 | } | |||
1963 | ||||
1964 | int | |||
1965 | rte_vfio_dma_unmap(uint64_t __rte_unused__attribute__((__unused__)) vaddr, uint64_t __rte_unused__attribute__((__unused__)) iova, | |||
1966 | __rte_unused__attribute__((__unused__)) uint64_t len) | |||
1967 | { | |||
1968 | return -1; | |||
1969 | } | |||
1970 | ||||
1971 | int | |||
1972 | rte_vfio_setup_device(__rte_unused__attribute__((__unused__)) const char *sysfs_base, | |||
1973 | __rte_unused__attribute__((__unused__)) const char *dev_addr, | |||
1974 | __rte_unused__attribute__((__unused__)) int *vfio_dev_fd, | |||
1975 | __rte_unused__attribute__((__unused__)) struct vfio_device_info *device_info) | |||
1976 | { | |||
1977 | return -1; | |||
1978 | } | |||
1979 | ||||
1980 | int | |||
1981 | rte_vfio_release_device(__rte_unused__attribute__((__unused__)) const char *sysfs_base, | |||
1982 | __rte_unused__attribute__((__unused__)) const char *dev_addr, __rte_unused__attribute__((__unused__)) int fd) | |||
1983 | { | |||
1984 | return -1; | |||
1985 | } | |||
1986 | ||||
1987 | int | |||
1988 | rte_vfio_enable(__rte_unused__attribute__((__unused__)) const char *modname) | |||
1989 | { | |||
1990 | return -1; | |||
1991 | } | |||
1992 | ||||
1993 | int | |||
1994 | rte_vfio_is_enabled(__rte_unused__attribute__((__unused__)) const char *modname) | |||
1995 | { | |||
1996 | return -1; | |||
1997 | } | |||
1998 | ||||
1999 | int | |||
2000 | rte_vfio_noiommu_is_enabled(void) | |||
2001 | { | |||
2002 | return -1; | |||
2003 | } | |||
2004 | ||||
2005 | int | |||
2006 | rte_vfio_clear_group(__rte_unused__attribute__((__unused__)) int vfio_group_fd) | |||
2007 | { | |||
2008 | return -1; | |||
2009 | } | |||
2010 | ||||
2011 | int | |||
2012 | rte_vfio_get_group_num(__rte_unused__attribute__((__unused__)) const char *sysfs_base, | |||
2013 | __rte_unused__attribute__((__unused__)) const char *dev_addr, | |||
2014 | __rte_unused__attribute__((__unused__)) int *iommu_group_num) | |||
2015 | { | |||
2016 | return -1; | |||
2017 | } | |||
2018 | ||||
2019 | int | |||
2020 | rte_vfio_get_container_fd(void) | |||
2021 | { | |||
2022 | return -1; | |||
2023 | } | |||
2024 | ||||
2025 | int | |||
2026 | rte_vfio_get_group_fd(__rte_unused__attribute__((__unused__)) int iommu_group_num) | |||
2027 | { | |||
2028 | return -1; | |||
2029 | } | |||
2030 | ||||
2031 | int | |||
2032 | rte_vfio_container_create(void) | |||
2033 | { | |||
2034 | return -1; | |||
2035 | } | |||
2036 | ||||
2037 | int | |||
2038 | rte_vfio_container_destroy(__rte_unused__attribute__((__unused__)) int container_fd) | |||
2039 | { | |||
2040 | return -1; | |||
2041 | } | |||
2042 | ||||
2043 | int | |||
2044 | rte_vfio_container_group_bind(__rte_unused__attribute__((__unused__)) int container_fd, | |||
2045 | __rte_unused__attribute__((__unused__)) int iommu_group_num) | |||
2046 | { | |||
2047 | return -1; | |||
2048 | } | |||
2049 | ||||
2050 | int | |||
2051 | rte_vfio_container_group_unbind(__rte_unused__attribute__((__unused__)) int container_fd, | |||
2052 | __rte_unused__attribute__((__unused__)) int iommu_group_num) | |||
2053 | { | |||
2054 | return -1; | |||
2055 | } | |||
2056 | ||||
2057 | int | |||
2058 | rte_vfio_container_dma_map(__rte_unused__attribute__((__unused__)) int container_fd, | |||
2059 | __rte_unused__attribute__((__unused__)) uint64_t vaddr, | |||
2060 | __rte_unused__attribute__((__unused__)) uint64_t iova, | |||
2061 | __rte_unused__attribute__((__unused__)) uint64_t len) | |||
2062 | { | |||
2063 | return -1; | |||
2064 | } | |||
2065 | ||||
2066 | int | |||
2067 | rte_vfio_container_dma_unmap(__rte_unused__attribute__((__unused__)) int container_fd, | |||
2068 | __rte_unused__attribute__((__unused__)) uint64_t vaddr, | |||
2069 | __rte_unused__attribute__((__unused__)) uint64_t iova, | |||
2070 | __rte_unused__attribute__((__unused__)) uint64_t len) | |||
2071 | { | |||
2072 | return -1; | |||
2073 | } | |||
2074 | ||||
2075 | #endif /* VFIO_PRESENT */ |