File: | home/bhubbard/working/src/ceph/src/spdk/dpdk/drivers/bus/pci/linux/pci_vfio.c |
Warning: | line 266, column 4 Value stored to 'intr_mode' is never read |
[?] Use j/k keys for keyboard navigation
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2014 Intel Corporation |
3 | */ |
4 | |
5 | #include <string.h> |
6 | #include <fcntl.h> |
7 | #include <linux1/pci_regs.h> |
8 | #include <sys/eventfd.h> |
9 | #include <sys/socket.h> |
10 | #include <sys/ioctl.h> |
11 | #include <sys/mman.h> |
12 | #include <stdbool.h> |
13 | |
14 | #include <rte_log.h> |
15 | #include <rte_pci.h> |
16 | #include <rte_bus_pci.h> |
17 | #include <rte_eal_memconfig.h> |
18 | #include <rte_malloc.h> |
19 | #include <rte_vfio.h> |
20 | #include <rte_eal.h> |
21 | #include <rte_bus.h> |
22 | #include <rte_spinlock.h> |
23 | |
24 | #include "eal_filesystem.h" |
25 | |
26 | #include "pci_init.h" |
27 | #include "private.h" |
28 | |
29 | /** |
30 | * @file |
31 | * PCI probing under linux (VFIO version) |
32 | * |
33 | * This code tries to determine if the PCI device is bound to VFIO driver, |
34 | * and initialize it (map BARs, set up interrupts) if that's the case. |
35 | * |
36 | * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". |
37 | */ |
38 | |
39 | #ifdef VFIO_PRESENT |
40 | |
41 | #ifndef PAGE_SIZE(sysconf(_SC_PAGESIZE)) |
42 | #define PAGE_SIZE(sysconf(_SC_PAGESIZE)) (sysconf(_SC_PAGESIZE_SC_PAGESIZE)) |
43 | #endif |
44 | #define PAGE_MASK(~((sysconf(_SC_PAGESIZE)) - 1)) (~(PAGE_SIZE(sysconf(_SC_PAGESIZE)) - 1)) |
45 | |
46 | static struct rte_tailq_elem rte_vfio_tailq = { |
47 | .name = "VFIO_RESOURCE_LIST", |
48 | }; |
49 | EAL_REGISTER_TAILQ(rte_vfio_tailq)static void __attribute__((constructor(65535), used)) tailqinitfn_rte_vfio_tailq (void) { if (rte_eal_tailq_register(&rte_vfio_tailq) < 0) __rte_panic(__func__, "Cannot initialize tailq: %s\n" "%.0s" , rte_vfio_tailq.name, "dummy"); } |
50 | |
51 | int |
52 | pci_vfio_read_config(const struct rte_intr_handle *intr_handle, |
53 | void *buf, size_t len, off_t offs) |
54 | { |
55 | return pread64(intr_handle->vfio_dev_fd, buf, len, |
56 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + offs); |
57 | } |
58 | |
59 | int |
60 | pci_vfio_write_config(const struct rte_intr_handle *intr_handle, |
61 | const void *buf, size_t len, off_t offs) |
62 | { |
63 | return pwrite64(intr_handle->vfio_dev_fd, buf, len, |
64 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + offs); |
65 | } |
66 | |
67 | /* get PCI BAR number where MSI-X interrupts are */ |
68 | static int |
69 | pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) |
70 | { |
71 | int ret; |
72 | uint32_t reg; |
73 | uint16_t flags; |
74 | uint8_t cap_id, cap_offset; |
75 | |
76 | /* read PCI capability pointer from config space */ |
77 | ret = pread64(fd, ®, sizeof(reg), |
78 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + |
79 | PCI_CAPABILITY_LIST0x34); |
80 | if (ret != sizeof(reg)) { |
81 | RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "rte_log(4U, 0, "EAL" ": " "Cannot read capability pointer from PCI " "config space!\n") |
82 | "config space!\n")rte_log(4U, 0, "EAL" ": " "Cannot read capability pointer from PCI " "config space!\n"); |
83 | return -1; |
84 | } |
85 | |
86 | /* we need first byte */ |
87 | cap_offset = reg & 0xFF; |
88 | |
89 | while (cap_offset) { |
90 | |
91 | /* read PCI capability ID */ |
92 | ret = pread64(fd, ®, sizeof(reg), |
93 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + |
94 | cap_offset); |
95 | if (ret != sizeof(reg)) { |
96 | RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI "rte_log(4U, 0, "EAL" ": " "Cannot read capability ID from PCI " "config space!\n") |
97 | "config space!\n")rte_log(4U, 0, "EAL" ": " "Cannot read capability ID from PCI " "config space!\n"); |
98 | return -1; |
99 | } |
100 | |
101 | /* we need first byte */ |
102 | cap_id = reg & 0xFF; |
103 | |
104 | /* if we haven't reached MSI-X, check next capability */ |
105 | if (cap_id != PCI_CAP_ID_MSIX0x11) { |
106 | ret = pread64(fd, ®, sizeof(reg), |
107 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + |
108 | cap_offset); |
109 | if (ret != sizeof(reg)) { |
110 | RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "rte_log(4U, 0, "EAL" ": " "Cannot read capability pointer from PCI " "config space!\n") |
111 | "config space!\n")rte_log(4U, 0, "EAL" ": " "Cannot read capability pointer from PCI " "config space!\n"); |
112 | return -1; |
113 | } |
114 | |
115 | /* we need second byte */ |
116 | cap_offset = (reg & 0xFF00) >> 8; |
117 | |
118 | continue; |
119 | } |
120 | /* else, read table offset */ |
121 | else { |
122 | /* table offset resides in the next 4 bytes */ |
123 | ret = pread64(fd, ®, sizeof(reg), |
124 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + |
125 | cap_offset + 4); |
126 | if (ret != sizeof(reg)) { |
127 | RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config "rte_log(4U, 0, "EAL" ": " "Cannot read table offset from PCI config " "space!\n") |
128 | "space!\n")rte_log(4U, 0, "EAL" ": " "Cannot read table offset from PCI config " "space!\n"); |
129 | return -1; |
130 | } |
131 | |
132 | ret = pread64(fd, &flags, sizeof(flags), |
133 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + |
134 | cap_offset + 2); |
135 | if (ret != sizeof(flags)) { |
136 | RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "rte_log(4U, 0, "EAL" ": " "Cannot read table flags from PCI config " "space!\n") |
137 | "space!\n")rte_log(4U, 0, "EAL" ": " "Cannot read table flags from PCI config " "space!\n"); |
138 | return -1; |
139 | } |
140 | |
141 | msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR0x00000007; |
142 | msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET0xfffffff8; |
143 | msix_table->size = |
144 | 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE0x07FF)); |
145 | |
146 | return 0; |
147 | } |
148 | } |
149 | return 0; |
150 | } |
151 | |
152 | /* set PCI bus mastering */ |
153 | static int |
154 | pci_vfio_set_bus_master(int dev_fd, bool_Bool op) |
155 | { |
156 | uint16_t reg; |
157 | int ret; |
158 | |
159 | ret = pread64(dev_fd, ®, sizeof(reg), |
160 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + |
161 | PCI_COMMAND0x04); |
162 | if (ret != sizeof(reg)) { |
163 | RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n")rte_log(4U, 0, "EAL" ": " "Cannot read command from PCI config space!\n" ); |
164 | return -1; |
165 | } |
166 | |
167 | if (op) |
168 | /* set the master bit */ |
169 | reg |= PCI_COMMAND_MASTER0x4; |
170 | else |
171 | reg &= ~(PCI_COMMAND_MASTER0x4); |
172 | |
173 | ret = pwrite64(dev_fd, ®, sizeof(reg), |
174 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) + |
175 | PCI_COMMAND0x04); |
176 | |
177 | if (ret != sizeof(reg)) { |
178 | RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n")rte_log(4U, 0, "EAL" ": " "Cannot write command to PCI config space!\n" ); |
179 | return -1; |
180 | } |
181 | |
182 | return 0; |
183 | } |
184 | |
185 | /* set up interrupt support (but not enable interrupts) */ |
186 | static int |
187 | pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) |
188 | { |
189 | int i, ret, intr_idx; |
190 | enum rte_intr_mode intr_mode; |
191 | |
192 | /* default to invalid index */ |
193 | intr_idx = VFIO_PCI_NUM_IRQS; |
194 | |
195 | /* Get default / configured intr_mode */ |
196 | intr_mode = rte_eal_vfio_intr_mode(); |
197 | |
198 | /* get interrupt type from internal config (MSI-X by default, can be |
199 | * overridden from the command line |
200 | */ |
201 | switch (intr_mode) { |
202 | case RTE_INTR_MODE_MSIX: |
203 | intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; |
204 | break; |
205 | case RTE_INTR_MODE_MSI: |
206 | intr_idx = VFIO_PCI_MSI_IRQ_INDEX; |
207 | break; |
208 | case RTE_INTR_MODE_LEGACY: |
209 | intr_idx = VFIO_PCI_INTX_IRQ_INDEX; |
210 | break; |
211 | /* don't do anything if we want to automatically determine interrupt type */ |
212 | case RTE_INTR_MODE_NONE: |
213 | break; |
214 | default: |
215 | RTE_LOG(ERR, EAL, " unknown default interrupt type!\n")rte_log(4U, 0, "EAL" ": " " unknown default interrupt type!\n" ); |
216 | return -1; |
217 | } |
218 | |
219 | /* start from MSI-X interrupt type */ |
220 | for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { |
221 | struct vfio_irq_info irq = { .argsz = sizeof(irq) }; |
222 | int fd = -1; |
223 | |
224 | /* skip interrupt modes we don't want */ |
225 | if (intr_mode != RTE_INTR_MODE_NONE && |
226 | i != intr_idx) |
227 | continue; |
228 | |
229 | irq.index = i; |
230 | |
231 | ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 9)) << 0) | ((0) << ((0 +8)+8))), &irq); |
232 | if (ret < 0) { |
233 | RTE_LOG(ERR, EAL, " cannot get IRQ info, "rte_log(4U, 0, "EAL" ": " " cannot get IRQ info, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
234 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot get IRQ info, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
235 | return -1; |
236 | } |
237 | |
238 | /* if this vector cannot be used with eventfd, fail if we explicitly |
239 | * specified interrupt type, otherwise continue */ |
240 | if ((irq.flags & VFIO_IRQ_INFO_EVENTFD(1 << 0)) == 0) { |
241 | if (intr_mode != RTE_INTR_MODE_NONE) { |
242 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " " interrupt vector does not support eventfd!\n" ) |
243 | " interrupt vector does not support eventfd!\n")rte_log(4U, 0, "EAL" ": " " interrupt vector does not support eventfd!\n" ); |
244 | return -1; |
245 | } else |
246 | continue; |
247 | } |
248 | |
249 | /* set up an eventfd for interrupts */ |
250 | fd = eventfd(0, EFD_NONBLOCKEFD_NONBLOCK | EFD_CLOEXECEFD_CLOEXEC); |
251 | if (fd < 0) { |
252 | RTE_LOG(ERR, EAL, " cannot set up eventfd, "rte_log(4U, 0, "EAL" ": " " cannot set up eventfd, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
253 | "error %i (%s)\n", errno, strerror(errno))rte_log(4U, 0, "EAL" ": " " cannot set up eventfd, " "error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
254 | return -1; |
255 | } |
256 | |
257 | dev->intr_handle.fd = fd; |
258 | dev->intr_handle.vfio_dev_fd = vfio_dev_fd; |
259 | |
260 | switch (i) { |
261 | case VFIO_PCI_MSIX_IRQ_INDEX: |
262 | intr_mode = RTE_INTR_MODE_MSIX; |
263 | dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; |
264 | break; |
265 | case VFIO_PCI_MSI_IRQ_INDEX: |
266 | intr_mode = RTE_INTR_MODE_MSI; |
Value stored to 'intr_mode' is never read | |
267 | dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; |
268 | break; |
269 | case VFIO_PCI_INTX_IRQ_INDEX: |
270 | intr_mode = RTE_INTR_MODE_LEGACY; |
271 | dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; |
272 | break; |
273 | default: |
274 | RTE_LOG(ERR, EAL, " unknown interrupt type!\n")rte_log(4U, 0, "EAL" ": " " unknown interrupt type!\n"); |
275 | return -1; |
276 | } |
277 | |
278 | return 0; |
279 | } |
280 | |
281 | /* if we're here, we haven't found a suitable interrupt vector */ |
282 | return -1; |
283 | } |
284 | |
285 | #ifdef HAVE_VFIO_DEV_REQ_INTERFACE |
286 | /* |
287 | * Spinlock for device hot-unplug failure handling. |
288 | * If it tries to access bus or device, such as handle sigbus on bus |
289 | * or handle memory failure for device, just need to use this lock. |
290 | * It could protect the bus and the device to avoid race condition. |
291 | */ |
292 | static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER{ 0 }; |
293 | |
294 | static void |
295 | pci_vfio_req_handler(void *param) |
296 | { |
297 | struct rte_bus *bus; |
298 | int ret; |
299 | struct rte_device *device = (struct rte_device *)param; |
300 | |
301 | rte_spinlock_lock(&failure_handle_lock); |
302 | bus = rte_bus_find_by_device(device); |
303 | if (bus == NULL((void*)0)) { |
304 | RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",rte_log(4U, 0, "EAL" ": " "Cannot find bus for device (%s)\n" , device->name) |
305 | device->name)rte_log(4U, 0, "EAL" ": " "Cannot find bus for device (%s)\n" , device->name); |
306 | goto handle_end; |
307 | } |
308 | |
309 | /* |
310 | * vfio kernel module request user space to release allocated |
311 | * resources before device be deleted in kernel, so it can directly |
312 | * call the vfio bus hot-unplug handler to process it. |
313 | */ |
314 | ret = bus->hot_unplug_handler(device); |
315 | if (ret) |
316 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "Can not handle hot-unplug for device (%s)\n" , device->name) |
317 | "Can not handle hot-unplug for device (%s)\n",rte_log(4U, 0, "EAL" ": " "Can not handle hot-unplug for device (%s)\n" , device->name) |
318 | device->name)rte_log(4U, 0, "EAL" ": " "Can not handle hot-unplug for device (%s)\n" , device->name); |
319 | handle_end: |
320 | rte_spinlock_unlock(&failure_handle_lock); |
321 | } |
322 | |
323 | /* enable notifier (only enable req now) */ |
324 | static int |
325 | pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd) |
326 | { |
327 | int ret; |
328 | int fd = -1; |
329 | |
330 | /* set up an eventfd for req notifier */ |
331 | fd = eventfd(0, EFD_NONBLOCKEFD_NONBLOCK | EFD_CLOEXECEFD_CLOEXEC); |
332 | if (fd < 0) { |
333 | RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n",rte_log(4U, 0, "EAL" ": " "Cannot set up eventfd, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
334 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " "Cannot set up eventfd, error %i (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
335 | return -1; |
336 | } |
337 | |
338 | dev->vfio_req_intr_handle.fd = fd; |
339 | dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ; |
340 | dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; |
341 | |
342 | ret = rte_intr_callback_register(&dev->vfio_req_intr_handle, |
343 | pci_vfio_req_handler, |
344 | (void *)&dev->device); |
345 | if (ret) { |
346 | RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n")rte_log(4U, 0, "EAL" ": " "Fail to register req notifier handler.\n" ); |
347 | goto error; |
348 | } |
349 | |
350 | ret = rte_intr_enable(&dev->vfio_req_intr_handle); |
351 | if (ret) { |
352 | RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n")rte_log(4U, 0, "EAL" ": " "Fail to enable req notifier.\n"); |
353 | ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, |
354 | pci_vfio_req_handler, |
355 | (void *)&dev->device); |
356 | if (ret < 0) |
357 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "Fail to unregister req notifier handler.\n" ) |
358 | "Fail to unregister req notifier handler.\n")rte_log(4U, 0, "EAL" ": " "Fail to unregister req notifier handler.\n" ); |
359 | goto error; |
360 | } |
361 | |
362 | return 0; |
363 | error: |
364 | close(fd); |
365 | |
366 | dev->vfio_req_intr_handle.fd = -1; |
367 | dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; |
368 | dev->vfio_req_intr_handle.vfio_dev_fd = -1; |
369 | |
370 | return -1; |
371 | } |
372 | |
373 | /* disable notifier (only disable req now) */ |
374 | static int |
375 | pci_vfio_disable_notifier(struct rte_pci_device *dev) |
376 | { |
377 | int ret; |
378 | |
379 | ret = rte_intr_disable(&dev->vfio_req_intr_handle); |
380 | if (ret) { |
381 | RTE_LOG(ERR, EAL, "fail to disable req notifier.\n")rte_log(4U, 0, "EAL" ": " "fail to disable req notifier.\n"); |
382 | return -1; |
383 | } |
384 | |
385 | ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, |
386 | pci_vfio_req_handler, |
387 | (void *)&dev->device); |
388 | if (ret < 0) { |
389 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "fail to unregister req notifier handler.\n" ) |
390 | "fail to unregister req notifier handler.\n")rte_log(4U, 0, "EAL" ": " "fail to unregister req notifier handler.\n" ); |
391 | return -1; |
392 | } |
393 | |
394 | close(dev->vfio_req_intr_handle.fd); |
395 | |
396 | dev->vfio_req_intr_handle.fd = -1; |
397 | dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; |
398 | dev->vfio_req_intr_handle.vfio_dev_fd = -1; |
399 | |
400 | return 0; |
401 | } |
402 | #endif |
403 | |
404 | static int |
405 | pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) |
406 | { |
407 | uint32_t ioport_bar; |
408 | int ret; |
409 | |
410 | ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), |
411 | VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)((uint64_t) VFIO_PCI_CONFIG_REGION_INDEX << 40ULL) |
412 | + PCI_BASE_ADDRESS_00x10 + bar_index*4); |
413 | if (ret != sizeof(ioport_bar)) { |
414 | RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",rte_log(4U, 0, "EAL" ": " "Cannot read command (%x) from config space!\n" , 0x10 + bar_index*4) |
415 | PCI_BASE_ADDRESS_0 + bar_index*4)rte_log(4U, 0, "EAL" ": " "Cannot read command (%x) from config space!\n" , 0x10 + bar_index*4); |
416 | return -1; |
417 | } |
418 | |
419 | return (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO0x01) != 0; |
420 | } |
421 | |
422 | static int |
423 | pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) |
424 | { |
425 | if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { |
426 | RTE_LOG(ERR, EAL, "Error setting up interrupts!\n")rte_log(4U, 0, "EAL" ": " "Error setting up interrupts!\n"); |
427 | return -1; |
428 | } |
429 | |
430 | /* set bus mastering for the device */ |
431 | if (pci_vfio_set_bus_master(vfio_dev_fd, true1)) { |
432 | RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n")rte_log(4U, 0, "EAL" ": " "Cannot set up bus mastering!\n"); |
433 | return -1; |
434 | } |
435 | |
436 | /* |
437 | * Reset the device. If the device is not capable of resetting, |
438 | * then it updates errno as EINVAL. |
439 | */ |
440 | if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 11)) << 0) | ((0) << ((0 +8)+8)))) && errno(*__errno_location ()) != EINVAL22) { |
441 | RTE_LOG(ERR, EAL, "Unable to reset device! Error: %d (%s)\n",rte_log(4U, 0, "EAL" ": " "Unable to reset device! Error: %d (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))) |
442 | errno, strerror(errno))rte_log(4U, 0, "EAL" ": " "Unable to reset device! Error: %d (%s)\n" , (*__errno_location ()), strerror((*__errno_location ()))); |
443 | return -1; |
444 | } |
445 | |
446 | return 0; |
447 | } |
448 | |
449 | static int |
450 | pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, |
451 | int bar_index, int additional_flags) |
452 | { |
453 | struct memreg { |
454 | unsigned long offset, size; |
455 | } memreg[2] = {}; |
456 | void *bar_addr; |
457 | struct pci_msix_table *msix_table = &vfio_res->msix_table; |
458 | struct pci_map *bar = &vfio_res->maps[bar_index]; |
459 | |
460 | if (bar->size == 0) { |
461 | RTE_LOG(DEBUG, EAL, "Bar size is 0, skip BAR%d\n", bar_index)rte_log(8U, 0, "EAL" ": " "Bar size is 0, skip BAR%d\n", bar_index ); |
462 | return 0; |
463 | } |
464 | |
465 | if (msix_table->bar_index == bar_index) { |
466 | /* |
467 | * VFIO will not let us map the MSI-X table, |
468 | * but we can map around it. |
469 | */ |
470 | uint32_t table_start = msix_table->offset; |
471 | uint32_t table_end = table_start + msix_table->size; |
472 | table_end = RTE_ALIGN(table_end, PAGE_SIZE)(__typeof__(((table_end) + ((__typeof__(table_end)) ((sysconf (_SC_PAGESIZE))) - 1))))((((table_end) + ((__typeof__(table_end )) ((sysconf(_SC_PAGESIZE))) - 1))) & (~((__typeof__(((table_end ) + ((__typeof__(table_end)) ((sysconf(_SC_PAGESIZE))) - 1))) )(((sysconf(_SC_PAGESIZE))) - 1)))); |
473 | table_start = RTE_ALIGN_FLOOR(table_start, PAGE_SIZE)(__typeof__(table_start))((table_start) & (~((__typeof__( table_start))(((sysconf(_SC_PAGESIZE))) - 1)))); |
474 | |
475 | /* If page-aligned start of MSI-X table is less than the |
476 | * actual MSI-X table start address, reassign to the actual |
477 | * start address. |
478 | */ |
479 | if (table_start < msix_table->offset) |
480 | table_start = msix_table->offset; |
481 | |
482 | if (table_start == 0 && table_end >= bar->size) { |
483 | /* Cannot map this BAR */ |
484 | RTE_LOG(DEBUG, EAL, "Skipping BAR%d\n", bar_index)rte_log(8U, 0, "EAL" ": " "Skipping BAR%d\n", bar_index); |
485 | bar->size = 0; |
486 | bar->addr = 0; |
487 | return 0; |
488 | } |
489 | |
490 | memreg[0].offset = bar->offset; |
491 | memreg[0].size = table_start; |
492 | if (bar->size < table_end) { |
493 | /* |
494 | * If MSI-X table end is beyond BAR end, don't attempt |
495 | * to perform second mapping. |
496 | */ |
497 | memreg[1].offset = 0; |
498 | memreg[1].size = 0; |
499 | } else { |
500 | memreg[1].offset = bar->offset + table_end; |
501 | memreg[1].size = bar->size - table_end; |
502 | } |
503 | |
504 | RTE_LOG(DEBUG, EAL,rte_log(8U, 0, "EAL" ": " "Trying to map BAR%d that contains the MSI-X " "table. Trying offsets: " "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n" , bar_index, memreg[0].offset, memreg[0].size, memreg[1].offset , memreg[1].size) |
505 | "Trying to map BAR%d that contains the MSI-X "rte_log(8U, 0, "EAL" ": " "Trying to map BAR%d that contains the MSI-X " "table. Trying offsets: " "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n" , bar_index, memreg[0].offset, memreg[0].size, memreg[1].offset , memreg[1].size) |
506 | "table. Trying offsets: "rte_log(8U, 0, "EAL" ": " "Trying to map BAR%d that contains the MSI-X " "table. Trying offsets: " "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n" , bar_index, memreg[0].offset, memreg[0].size, memreg[1].offset , memreg[1].size) |
507 | "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", bar_index,rte_log(8U, 0, "EAL" ": " "Trying to map BAR%d that contains the MSI-X " "table. Trying offsets: " "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n" , bar_index, memreg[0].offset, memreg[0].size, memreg[1].offset , memreg[1].size) |
508 | memreg[0].offset, memreg[0].size,rte_log(8U, 0, "EAL" ": " "Trying to map BAR%d that contains the MSI-X " "table. Trying offsets: " "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n" , bar_index, memreg[0].offset, memreg[0].size, memreg[1].offset , memreg[1].size) |
509 | memreg[1].offset, memreg[1].size)rte_log(8U, 0, "EAL" ": " "Trying to map BAR%d that contains the MSI-X " "table. Trying offsets: " "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n" , bar_index, memreg[0].offset, memreg[0].size, memreg[1].offset , memreg[1].size); |
510 | } else { |
511 | memreg[0].offset = bar->offset; |
512 | memreg[0].size = bar->size; |
513 | } |
514 | |
515 | /* reserve the address using an inaccessible mapping */ |
516 | bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE0x02 | |
517 | MAP_ANONYMOUS0x20 | additional_flags, -1, 0); |
518 | if (bar_addr != MAP_FAILED((void *) -1)) { |
519 | void *map_addr = NULL((void*)0); |
520 | if (memreg[0].size) { |
521 | /* actual map of first part */ |
522 | map_addr = pci_map_resource(bar_addr, vfio_dev_fd, |
523 | memreg[0].offset, |
524 | memreg[0].size, |
525 | MAP_FIXED0x10); |
526 | } |
527 | |
528 | /* if there's a second part, try to map it */ |
529 | if (map_addr != MAP_FAILED((void *) -1) |
530 | && memreg[1].offset && memreg[1].size) { |
531 | void *second_addr = RTE_PTR_ADD(bar_addr,((void*)((uintptr_t)(bar_addr) + (memreg[1].offset - (uintptr_t )bar->offset))) |
532 | memreg[1].offset -((void*)((uintptr_t)(bar_addr) + (memreg[1].offset - (uintptr_t )bar->offset))) |
533 | (uintptr_t)bar->offset)((void*)((uintptr_t)(bar_addr) + (memreg[1].offset - (uintptr_t )bar->offset))); |
534 | map_addr = pci_map_resource(second_addr, |
535 | vfio_dev_fd, |
536 | memreg[1].offset, |
537 | memreg[1].size, |
538 | MAP_FIXED0x10); |
539 | } |
540 | |
541 | if (map_addr == MAP_FAILED((void *) -1) || !map_addr) { |
542 | munmap(bar_addr, bar->size); |
543 | bar_addr = MAP_FAILED((void *) -1); |
544 | RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n",rte_log(4U, 0, "EAL" ": " "Failed to map pci BAR%d\n", bar_index ) |
545 | bar_index)rte_log(4U, 0, "EAL" ": " "Failed to map pci BAR%d\n", bar_index ); |
546 | return -1; |
547 | } |
548 | } else { |
549 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "Failed to create inaccessible mapping for BAR%d\n" , bar_index) |
550 | "Failed to create inaccessible mapping for BAR%d\n",rte_log(4U, 0, "EAL" ": " "Failed to create inaccessible mapping for BAR%d\n" , bar_index) |
551 | bar_index)rte_log(4U, 0, "EAL" ": " "Failed to create inaccessible mapping for BAR%d\n" , bar_index); |
552 | return -1; |
553 | } |
554 | |
555 | bar->addr = bar_addr; |
556 | return 0; |
557 | } |
558 | |
559 | /* |
560 | * region info may contain capability headers, so we need to keep reallocating |
561 | * the memory until we match allocated memory size with argsz. |
562 | */ |
563 | static int |
564 | pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info, |
565 | int region) |
566 | { |
567 | struct vfio_region_info *ri; |
568 | size_t argsz = sizeof(*ri); |
569 | int ret; |
570 | |
571 | ri = malloc(sizeof(*ri)); |
572 | if (ri == NULL((void*)0)) { |
573 | RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n")rte_log(4U, 0, "EAL" ": " "Cannot allocate memory for region info\n" ); |
574 | return -1; |
575 | } |
576 | again: |
577 | memset(ri, 0, argsz); |
578 | ri->argsz = argsz; |
579 | ri->index = region; |
580 | |
581 | ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO(((0U) << (((0 +8)+8)+14)) | ((((';'))) << (0 +8) ) | (((100 + 8)) << 0) | ((0) << ((0 +8)+8))), ri); |
582 | if (ret < 0) { |
583 | free(ri); |
584 | return ret; |
585 | } |
586 | if (ri->argsz != argsz) { |
587 | struct vfio_region_info *tmp; |
588 | |
589 | argsz = ri->argsz; |
590 | tmp = realloc(ri, argsz); |
591 | |
592 | if (tmp == NULL((void*)0)) { |
593 | /* realloc failed but the ri is still there */ |
594 | free(ri); |
595 | RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n")rte_log(4U, 0, "EAL" ": " "Cannot reallocate memory for region info\n" ); |
596 | return -1; |
597 | } |
598 | ri = tmp; |
599 | goto again; |
600 | } |
601 | *info = ri; |
602 | |
603 | return 0; |
604 | } |
605 | |
606 | static struct vfio_info_cap_header * |
607 | pci_vfio_info_cap(struct vfio_region_info *info, int cap) |
608 | { |
609 | struct vfio_info_cap_header *h; |
610 | size_t offset; |
611 | |
612 | if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS(1 << 3)) == 0) { |
613 | /* VFIO info does not advertise capabilities */ |
614 | return NULL((void*)0); |
615 | } |
616 | |
617 | offset = VFIO_CAP_OFFSET(info)(info->resv); |
618 | while (offset != 0) { |
619 | h = RTE_PTR_ADD(info, offset)((void*)((uintptr_t)(info) + (offset))); |
620 | if (h->id == cap) |
621 | return h; |
622 | offset = h->next; |
623 | } |
624 | return NULL((void*)0); |
625 | } |
626 | |
627 | static int |
628 | pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) |
629 | { |
630 | struct vfio_region_info *info; |
631 | int ret; |
632 | |
633 | ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region); |
634 | if (ret < 0) |
635 | return -1; |
636 | |
637 | ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE3) != NULL((void*)0); |
638 | |
639 | /* cleanup */ |
640 | free(info); |
641 | |
642 | return ret; |
643 | } |
644 | |
645 | |
646 | static int |
647 | pci_vfio_map_resource_primary(struct rte_pci_device *dev) |
648 | { |
649 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; |
650 | char pci_addr[PATH_MAX4096] = {0}; |
651 | int vfio_dev_fd; |
652 | struct rte_pci_addr *loc = &dev->addr; |
653 | int i, ret; |
654 | struct mapped_pci_resource *vfio_res = NULL((void*)0); |
655 | struct mapped_pci_res_list *vfio_res_list = |
656 | RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list)(struct mapped_pci_res_list *)&(rte_vfio_tailq.head)-> tailq_head; |
657 | |
658 | struct pci_map *maps; |
659 | |
660 | dev->intr_handle.fd = -1; |
661 | #ifdef HAVE_VFIO_DEV_REQ_INTERFACE |
662 | dev->vfio_req_intr_handle.fd = -1; |
663 | #endif |
664 | |
665 | /* store PCI address string */ |
666 | snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT"%.4" "x" ":%.2" "x" ":%.2" "x" ".%" "x", |
667 | loc->domain, loc->bus, loc->devid, loc->function); |
668 | |
669 | ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, |
670 | &vfio_dev_fd, &device_info); |
671 | if (ret) |
672 | return ret; |
673 | |
674 | /* allocate vfio_res and get region info */ |
675 | vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); |
676 | if (vfio_res == NULL((void*)0)) { |
677 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "%s(): cannot store vfio mmap details\n" , __func__) |
678 | "%s(): cannot store vfio mmap details\n", __func__)rte_log(4U, 0, "EAL" ": " "%s(): cannot store vfio mmap details\n" , __func__); |
679 | goto err_vfio_dev_fd; |
680 | } |
681 | memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); |
682 | |
683 | /* get number of registers (up to BAR5) */ |
684 | vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,__extension__ ({ __typeof__ ((int) device_info.num_regions) _a = ((int) device_info.num_regions); __typeof__ (VFIO_PCI_BAR5_REGION_INDEX + 1) _b = (VFIO_PCI_BAR5_REGION_INDEX + 1); _a < _b ? _a : _b; }) |
685 | VFIO_PCI_BAR5_REGION_INDEX + 1)__extension__ ({ __typeof__ ((int) device_info.num_regions) _a = ((int) device_info.num_regions); __typeof__ (VFIO_PCI_BAR5_REGION_INDEX + 1) _b = (VFIO_PCI_BAR5_REGION_INDEX + 1); _a < _b ? _a : _b; }); |
686 | |
687 | /* map BARs */ |
688 | maps = vfio_res->maps; |
689 | |
690 | vfio_res->msix_table.bar_index = -1; |
691 | /* get MSI-X BAR, if any (we have to know where it is because we can't |
692 | * easily mmap it when using VFIO) |
693 | */ |
694 | ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table); |
695 | if (ret < 0) { |
696 | RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",rte_log(4U, 0, "EAL" ": " " %s cannot get MSI-X BAR number!\n" , pci_addr) |
697 | pci_addr)rte_log(4U, 0, "EAL" ": " " %s cannot get MSI-X BAR number!\n" , pci_addr); |
698 | goto err_vfio_res; |
699 | } |
700 | /* if we found our MSI-X BAR region, check if we can mmap it */ |
701 | if (vfio_res->msix_table.bar_index != -1) { |
702 | int ret = pci_vfio_msix_is_mappable(vfio_dev_fd, |
703 | vfio_res->msix_table.bar_index); |
704 | if (ret < 0) { |
705 | RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n")rte_log(4U, 0, "EAL" ": " "Couldn't check if MSI-X BAR is mappable\n" ); |
706 | goto err_vfio_res; |
707 | } else if (ret != 0) { |
708 | /* we can map it, so we don't care where it is */ |
709 | RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n")rte_log(8U, 0, "EAL" ": " "VFIO reports MSI-X BAR as mappable\n" ); |
710 | vfio_res->msix_table.bar_index = -1; |
711 | } |
712 | } |
713 | |
714 | for (i = 0; i < (int) vfio_res->nb_maps; i++) { |
715 | struct vfio_region_info *reg = NULL((void*)0); |
716 | void *bar_addr; |
717 | |
718 | ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); |
719 | if (ret < 0) { |
720 | RTE_LOG(ERR, EAL, " %s cannot get device region info "rte_log(4U, 0, "EAL" ": " " %s cannot get device region info " "error %i (%s)\n", pci_addr, (*__errno_location ()), strerror ((*__errno_location ()))) |
721 | "error %i (%s)\n", pci_addr, errno,rte_log(4U, 0, "EAL" ": " " %s cannot get device region info " "error %i (%s)\n", pci_addr, (*__errno_location ()), strerror ((*__errno_location ()))) |
722 | strerror(errno))rte_log(4U, 0, "EAL" ": " " %s cannot get device region info " "error %i (%s)\n", pci_addr, (*__errno_location ()), strerror ((*__errno_location ()))); |
723 | goto err_vfio_res; |
724 | } |
725 | |
726 | /* chk for io port region */ |
727 | ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); |
728 | if (ret < 0) { |
729 | free(reg); |
730 | goto err_vfio_res; |
731 | } else if (ret) { |
732 | RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",rte_log(7U, 0, "EAL" ": " "Ignore mapping IO port bar(%d)\n", i) |
733 | i)rte_log(7U, 0, "EAL" ": " "Ignore mapping IO port bar(%d)\n", i); |
734 | free(reg); |
735 | continue; |
736 | } |
737 | |
738 | /* skip non-mmapable BARs */ |
739 | if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP(1 << 2)) == 0) { |
740 | free(reg); |
741 | continue; |
742 | } |
743 | |
744 | /* try mapping somewhere close to the end of hugepages */ |
745 | if (pci_map_addr == NULL((void*)0)) |
746 | pci_map_addr = pci_find_max_end_va(); |
747 | |
748 | bar_addr = pci_map_addr; |
749 | pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size)((void*)((uintptr_t)(bar_addr) + ((size_t) reg->size))); |
750 | |
751 | maps[i].addr = bar_addr; |
752 | maps[i].offset = reg->offset; |
753 | maps[i].size = reg->size; |
754 | maps[i].path = NULL((void*)0); /* vfio doesn't have per-resource paths */ |
755 | |
756 | ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); |
757 | if (ret < 0) { |
758 | RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",rte_log(4U, 0, "EAL" ": " " %s mapping BAR%i failed: %s\n", pci_addr , i, strerror((*__errno_location ()))) |
759 | pci_addr, i, strerror(errno))rte_log(4U, 0, "EAL" ": " " %s mapping BAR%i failed: %s\n", pci_addr , i, strerror((*__errno_location ()))); |
760 | free(reg); |
761 | goto err_vfio_res; |
762 | } |
763 | |
764 | dev->mem_resource[i].addr = maps[i].addr; |
765 | |
766 | free(reg); |
767 | } |
768 | |
769 | if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) { |
770 | RTE_LOG(ERR, EAL, " %s setup device failed\n", pci_addr)rte_log(4U, 0, "EAL" ": " " %s setup device failed\n", pci_addr ); |
771 | goto err_vfio_res; |
772 | } |
773 | |
774 | #ifdef HAVE_VFIO_DEV_REQ_INTERFACE |
775 | if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) { |
776 | RTE_LOG(ERR, EAL, "Error setting up notifier!\n")rte_log(4U, 0, "EAL" ": " "Error setting up notifier!\n"); |
777 | goto err_vfio_res; |
778 | } |
779 | |
780 | #endif |
781 | TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next)do { (vfio_res)->next.tqe_next = ((void*)0); (vfio_res)-> next.tqe_prev = (vfio_res_list)->tqh_last; *(vfio_res_list )->tqh_last = (vfio_res); (vfio_res_list)->tqh_last = & (vfio_res)->next.tqe_next; } while ( 0); |
782 | |
783 | return 0; |
784 | err_vfio_res: |
785 | rte_free(vfio_res); |
786 | err_vfio_dev_fd: |
787 | close(vfio_dev_fd); |
788 | return -1; |
789 | } |
790 | |
791 | static int |
792 | pci_vfio_map_resource_secondary(struct rte_pci_device *dev) |
793 | { |
794 | struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; |
795 | char pci_addr[PATH_MAX4096] = {0}; |
796 | int vfio_dev_fd; |
797 | struct rte_pci_addr *loc = &dev->addr; |
798 | int i, ret; |
799 | struct mapped_pci_resource *vfio_res = NULL((void*)0); |
800 | struct mapped_pci_res_list *vfio_res_list = |
801 | RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list)(struct mapped_pci_res_list *)&(rte_vfio_tailq.head)-> tailq_head; |
802 | |
803 | struct pci_map *maps; |
804 | |
805 | dev->intr_handle.fd = -1; |
806 | #ifdef HAVE_VFIO_DEV_REQ_INTERFACE |
807 | dev->vfio_req_intr_handle.fd = -1; |
808 | #endif |
809 | |
810 | /* store PCI address string */ |
811 | snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT"%.4" "x" ":%.2" "x" ":%.2" "x" ".%" "x", |
812 | loc->domain, loc->bus, loc->devid, loc->function); |
813 | |
814 | /* if we're in a secondary process, just find our tailq entry */ |
815 | TAILQ_FOREACH(vfio_res, vfio_res_list, next)for ((vfio_res) = ((vfio_res_list)->tqh_first); (vfio_res) ; (vfio_res) = ((vfio_res)->next.tqe_next)) { |
816 | if (rte_pci_addr_cmp(&vfio_res->pci_addr, |
817 | &dev->addr)) |
818 | continue; |
819 | break; |
820 | } |
821 | /* if we haven't found our tailq entry, something's wrong */ |
822 | if (vfio_res == NULL((void*)0)) { |
823 | RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",rte_log(4U, 0, "EAL" ": " " %s cannot find TAILQ entry for PCI device!\n" , pci_addr) |
824 | pci_addr)rte_log(4U, 0, "EAL" ": " " %s cannot find TAILQ entry for PCI device!\n" , pci_addr); |
825 | return -1; |
826 | } |
827 | |
828 | ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, |
829 | &vfio_dev_fd, &device_info); |
830 | if (ret) |
831 | return ret; |
832 | |
833 | /* map BARs */ |
834 | maps = vfio_res->maps; |
835 | |
836 | for (i = 0; i < (int) vfio_res->nb_maps; i++) { |
837 | ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED0x10); |
838 | if (ret < 0) { |
839 | RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",rte_log(4U, 0, "EAL" ": " " %s mapping BAR%i failed: %s\n", pci_addr , i, strerror((*__errno_location ()))) |
840 | pci_addr, i, strerror(errno))rte_log(4U, 0, "EAL" ": " " %s mapping BAR%i failed: %s\n", pci_addr , i, strerror((*__errno_location ()))); |
841 | goto err_vfio_dev_fd; |
842 | } |
843 | |
844 | dev->mem_resource[i].addr = maps[i].addr; |
845 | } |
846 | |
847 | /* we need save vfio_dev_fd, so it can be used during release */ |
848 | dev->intr_handle.vfio_dev_fd = vfio_dev_fd; |
849 | #ifdef HAVE_VFIO_DEV_REQ_INTERFACE |
850 | dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; |
851 | #endif |
852 | |
853 | return 0; |
854 | err_vfio_dev_fd: |
855 | close(vfio_dev_fd); |
856 | return -1; |
857 | } |
858 | |
859 | /* |
860 | * map the PCI resources of a PCI device in virtual memory (VFIO version). |
861 | * primary and secondary processes follow almost exactly the same path |
862 | */ |
863 | int |
864 | pci_vfio_map_resource(struct rte_pci_device *dev) |
865 | { |
866 | if (rte_eal_process_type() == RTE_PROC_PRIMARY) |
867 | return pci_vfio_map_resource_primary(dev); |
868 | else |
869 | return pci_vfio_map_resource_secondary(dev); |
870 | } |
871 | |
872 | static struct mapped_pci_resource * |
873 | find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list, |
874 | struct rte_pci_device *dev, |
875 | const char *pci_addr) |
876 | { |
877 | struct mapped_pci_resource *vfio_res = NULL((void*)0); |
878 | struct pci_map *maps; |
879 | int i; |
880 | |
881 | /* Get vfio_res */ |
882 | TAILQ_FOREACH(vfio_res, vfio_res_list, next)for ((vfio_res) = ((vfio_res_list)->tqh_first); (vfio_res) ; (vfio_res) = ((vfio_res)->next.tqe_next)) { |
883 | if (rte_pci_addr_cmp(&vfio_res->pci_addr, &dev->addr)) |
884 | continue; |
885 | break; |
886 | } |
887 | |
888 | if (vfio_res == NULL((void*)0)) |
889 | return vfio_res; |
890 | |
891 | RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",rte_log(7U, 0, "EAL" ": " "Releasing pci mapped resource for %s\n" , pci_addr) |
892 | pci_addr)rte_log(7U, 0, "EAL" ": " "Releasing pci mapped resource for %s\n" , pci_addr); |
893 | |
894 | maps = vfio_res->maps; |
895 | for (i = 0; i < (int) vfio_res->nb_maps; i++) { |
896 | |
897 | /* |
898 | * We do not need to be aware of MSI-X table BAR mappings as |
899 | * when mapping. Just using current maps array is enough |
900 | */ |
901 | if (maps[i].addr) { |
902 | RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",rte_log(7U, 0, "EAL" ": " "Calling pci_unmap_resource for %s at %p\n" , pci_addr, maps[i].addr) |
903 | pci_addr, maps[i].addr)rte_log(7U, 0, "EAL" ": " "Calling pci_unmap_resource for %s at %p\n" , pci_addr, maps[i].addr); |
904 | pci_unmap_resource(maps[i].addr, maps[i].size); |
905 | } |
906 | } |
907 | |
908 | return vfio_res; |
909 | } |
910 | |
911 | static int |
912 | pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) |
913 | { |
914 | char pci_addr[PATH_MAX4096] = {0}; |
915 | struct rte_pci_addr *loc = &dev->addr; |
916 | struct mapped_pci_resource *vfio_res = NULL((void*)0); |
917 | struct mapped_pci_res_list *vfio_res_list; |
918 | int ret; |
919 | |
920 | /* store PCI address string */ |
921 | snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT"%.4" "x" ":%.2" "x" ":%.2" "x" ".%" "x", |
922 | loc->domain, loc->bus, loc->devid, loc->function); |
923 | |
924 | #ifdef HAVE_VFIO_DEV_REQ_INTERFACE |
925 | ret = pci_vfio_disable_notifier(dev); |
926 | if (ret) { |
927 | RTE_LOG(ERR, EAL, "fail to disable req notifier.\n")rte_log(4U, 0, "EAL" ": " "fail to disable req notifier.\n"); |
928 | return -1; |
929 | } |
930 | |
931 | #endif |
932 | if (close(dev->intr_handle.fd) < 0) { |
933 | RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",rte_log(7U, 0, "EAL" ": " "Error when closing eventfd file descriptor for %s\n" , pci_addr) |
934 | pci_addr)rte_log(7U, 0, "EAL" ": " "Error when closing eventfd file descriptor for %s\n" , pci_addr); |
935 | return -1; |
936 | } |
937 | |
938 | if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false0)) { |
939 | RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n",rte_log(4U, 0, "EAL" ": " " %s cannot unset bus mastering for PCI device!\n" , pci_addr) |
940 | pci_addr)rte_log(4U, 0, "EAL" ": " " %s cannot unset bus mastering for PCI device!\n" , pci_addr); |
941 | return -1; |
942 | } |
943 | |
944 | ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, |
945 | dev->intr_handle.vfio_dev_fd); |
946 | if (ret < 0) { |
947 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "%s(): cannot release device\n", __func__ ) |
948 | "%s(): cannot release device\n", __func__)rte_log(4U, 0, "EAL" ": " "%s(): cannot release device\n", __func__ ); |
949 | return ret; |
950 | } |
951 | |
952 | vfio_res_list = |
953 | RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list)(struct mapped_pci_res_list *)&(rte_vfio_tailq.head)-> tailq_head; |
954 | vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr); |
955 | |
956 | /* if we haven't found our tailq entry, something's wrong */ |
957 | if (vfio_res == NULL((void*)0)) { |
958 | RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",rte_log(4U, 0, "EAL" ": " " %s cannot find TAILQ entry for PCI device!\n" , pci_addr) |
959 | pci_addr)rte_log(4U, 0, "EAL" ": " " %s cannot find TAILQ entry for PCI device!\n" , pci_addr); |
960 | return -1; |
961 | } |
962 | |
963 | TAILQ_REMOVE(vfio_res_list, vfio_res, next)do { if (((vfio_res)->next.tqe_next) != ((void*)0)) (vfio_res )->next.tqe_next->next.tqe_prev = (vfio_res)->next.tqe_prev ; else (vfio_res_list)->tqh_last = (vfio_res)->next.tqe_prev ; *(vfio_res)->next.tqe_prev = (vfio_res)->next.tqe_next ; } while ( 0); |
964 | |
965 | return 0; |
966 | } |
967 | |
968 | static int |
969 | pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev) |
970 | { |
971 | char pci_addr[PATH_MAX4096] = {0}; |
972 | struct rte_pci_addr *loc = &dev->addr; |
973 | struct mapped_pci_resource *vfio_res = NULL((void*)0); |
974 | struct mapped_pci_res_list *vfio_res_list; |
975 | int ret; |
976 | |
977 | /* store PCI address string */ |
978 | snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT"%.4" "x" ":%.2" "x" ":%.2" "x" ".%" "x", |
979 | loc->domain, loc->bus, loc->devid, loc->function); |
980 | |
981 | ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, |
982 | dev->intr_handle.vfio_dev_fd); |
983 | if (ret < 0) { |
984 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "%s(): cannot release device\n", __func__ ) |
985 | "%s(): cannot release device\n", __func__)rte_log(4U, 0, "EAL" ": " "%s(): cannot release device\n", __func__ ); |
986 | return ret; |
987 | } |
988 | |
989 | vfio_res_list = |
990 | RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list)(struct mapped_pci_res_list *)&(rte_vfio_tailq.head)-> tailq_head; |
991 | vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr); |
992 | |
993 | /* if we haven't found our tailq entry, something's wrong */ |
994 | if (vfio_res == NULL((void*)0)) { |
995 | RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",rte_log(4U, 0, "EAL" ": " " %s cannot find TAILQ entry for PCI device!\n" , pci_addr) |
996 | pci_addr)rte_log(4U, 0, "EAL" ": " " %s cannot find TAILQ entry for PCI device!\n" , pci_addr); |
997 | return -1; |
998 | } |
999 | |
1000 | return 0; |
1001 | } |
1002 | |
1003 | int |
1004 | pci_vfio_unmap_resource(struct rte_pci_device *dev) |
1005 | { |
1006 | if (rte_eal_process_type() == RTE_PROC_PRIMARY) |
1007 | return pci_vfio_unmap_resource_primary(dev); |
1008 | else |
1009 | return pci_vfio_unmap_resource_secondary(dev); |
1010 | } |
1011 | |
1012 | int |
1013 | pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, |
1014 | struct rte_pci_ioport *p) |
1015 | { |
1016 | if (bar < VFIO_PCI_BAR0_REGION_INDEX || |
1017 | bar > VFIO_PCI_BAR5_REGION_INDEX) { |
1018 | RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar)rte_log(4U, 0, "EAL" ": " "invalid bar (%d)!\n", bar); |
1019 | return -1; |
1020 | } |
1021 | |
1022 | p->dev = dev; |
1023 | p->base = VFIO_GET_REGION_ADDR(bar)((uint64_t) bar << 40ULL); |
1024 | return 0; |
1025 | } |
1026 | |
1027 | void |
1028 | pci_vfio_ioport_read(struct rte_pci_ioport *p, |
1029 | void *data, size_t len, off_t offset) |
1030 | { |
1031 | const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; |
1032 | |
1033 | if (pread64(intr_handle->vfio_dev_fd, data, |
1034 | len, p->base + offset) <= 0) |
1035 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "Can't read from PCI bar (%" "l" "u" ") : offset (%x)\n", (p->base >> 40), (int)offset) |
1036 | "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n",rte_log(4U, 0, "EAL" ": " "Can't read from PCI bar (%" "l" "u" ") : offset (%x)\n", (p->base >> 40), (int)offset) |
1037 | VFIO_GET_REGION_IDX(p->base), (int)offset)rte_log(4U, 0, "EAL" ": " "Can't read from PCI bar (%" "l" "u" ") : offset (%x)\n", (p->base >> 40), (int)offset); |
1038 | } |
1039 | |
1040 | void |
1041 | pci_vfio_ioport_write(struct rte_pci_ioport *p, |
1042 | const void *data, size_t len, off_t offset) |
1043 | { |
1044 | const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; |
1045 | |
1046 | if (pwrite64(intr_handle->vfio_dev_fd, data, |
1047 | len, p->base + offset) <= 0) |
1048 | RTE_LOG(ERR, EAL,rte_log(4U, 0, "EAL" ": " "Can't write to PCI bar (%" "l" "u" ") : offset (%x)\n", (p->base >> 40), (int)offset) |
1049 | "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n",rte_log(4U, 0, "EAL" ": " "Can't write to PCI bar (%" "l" "u" ") : offset (%x)\n", (p->base >> 40), (int)offset) |
1050 | VFIO_GET_REGION_IDX(p->base), (int)offset)rte_log(4U, 0, "EAL" ": " "Can't write to PCI bar (%" "l" "u" ") : offset (%x)\n", (p->base >> 40), (int)offset); |
1051 | } |
1052 | |
1053 | int |
1054 | pci_vfio_ioport_unmap(struct rte_pci_ioport *p) |
1055 | { |
1056 | RTE_SET_USED(p)(void)(p); |
1057 | return -1; |
1058 | } |
1059 | |
1060 | int |
1061 | pci_vfio_is_enabled(void) |
1062 | { |
1063 | return rte_vfio_is_enabled("vfio_pci"); |
1064 | } |
1065 | #endif |