79c4324644
Change-Id: I2d302dda68298877c65c99147f5bf22186a59aac
416 lines
15 KiB
Diff
416 lines
15 KiB
Diff
From 649e277b6ec0d2cd798f6d43776ea38b00450db9 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
|
|
Date: Mon, 14 Mar 2022 18:34:51 +0100
|
|
Subject: [PATCH] vdpa: Add custom IOTLB translations to SVQ
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
Use translations added in VhostIOVATree in SVQ.
|
|
|
|
Only introduce usage here, not allocation and deallocation. As with
|
|
previous patches, we use the dead code paths of shadow_vqs_enabled to
|
|
avoid commiting too many changes at once. These are impossible to take
|
|
at the moment.
|
|
|
|
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
|
|
Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
|
Signed-off-by: fangyi <eric.fangyi@huawei.com>
|
|
---
|
|
hw/virtio/vhost-shadow-virtqueue.c | 86 +++++++++++++++++---
|
|
hw/virtio/vhost-shadow-virtqueue.h | 6 +-
|
|
hw/virtio/vhost-vdpa.c | 122 ++++++++++++++++++++++++-----
|
|
include/hw/virtio/vhost-vdpa.h | 3 +
|
|
4 files changed, 187 insertions(+), 30 deletions(-)
|
|
|
|
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
|
|
index 46e94f0861..c38b6b6ab5 100644
|
|
--- a/hw/virtio/vhost-shadow-virtqueue.c
|
|
+++ b/hw/virtio/vhost-shadow-virtqueue.c
|
|
@@ -69,7 +69,59 @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
|
|
return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
|
|
}
|
|
|
|
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
|
|
+/**
|
|
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
|
|
+ *
|
|
+ * @svq: Shadow VirtQueue
|
|
+ * @vaddr: Translated IOVA addresses
|
|
+ * @iovec: Source qemu's VA addresses
|
|
+ * @num: Length of iovec and minimum length of vaddr
|
|
+ */
|
|
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
|
|
+ hwaddr *addrs, const struct iovec *iovec,
|
|
+ size_t num)
|
|
+{
|
|
+ if (num == 0) {
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ for (size_t i = 0; i < num; ++i) {
|
|
+ DMAMap needle = {
|
|
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
|
|
+ .size = iovec[i].iov_len,
|
|
+ };
|
|
+ Int128 needle_last, map_last;
|
|
+ size_t off;
|
|
+
|
|
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
|
|
+ /*
|
|
+ * Map cannot be NULL since iova map contains all guest space and
|
|
+ * qemu already has a physical address mapped
|
|
+ */
|
|
+ if (unlikely(!map)) {
|
|
+ qemu_log_mask(LOG_GUEST_ERROR,
|
|
+ "Invalid address 0x%"HWADDR_PRIx" given by guest",
|
|
+ needle.translated_addr);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ off = needle.translated_addr - map->translated_addr;
|
|
+ addrs[i] = map->iova + off;
|
|
+
|
|
+ needle_last = int128_add(int128_make64(needle.translated_addr),
|
|
+ int128_make64(iovec[i].iov_len));
|
|
+ map_last = int128_make64(map->translated_addr + map->size);
|
|
+ if (unlikely(int128_gt(needle_last, map_last))) {
|
|
+ qemu_log_mask(LOG_GUEST_ERROR,
|
|
+ "Guest buffer expands over iova range");
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
|
|
const struct iovec *iovec, size_t num,
|
|
bool more_descs, bool write)
|
|
{
|
|
@@ -88,7 +140,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
|
|
} else {
|
|
descs[i].flags = flags;
|
|
}
|
|
- descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
|
|
+ descs[i].addr = cpu_to_le64(sg[n]);
|
|
descs[i].len = cpu_to_le32(iovec[n].iov_len);
|
|
|
|
last = i;
|
|
@@ -103,6 +155,8 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
|
|
{
|
|
unsigned avail_idx;
|
|
vring_avail_t *avail = svq->vring.avail;
|
|
+ bool ok;
|
|
+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
|
|
|
|
*head = svq->free_head;
|
|
|
|
@@ -113,9 +167,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
|
|
return false;
|
|
}
|
|
|
|
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
|
|
- false);
|
|
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
|
|
+ ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
|
|
+ if (unlikely(!ok)) {
|
|
+ return false;
|
|
+ }
|
|
+ vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
|
|
+ elem->in_num > 0, false);
|
|
+
|
|
+
|
|
+ ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
|
|
+ if (unlikely(!ok)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
|
|
|
|
/*
|
|
* Put the entry in the available array (but don't update avail->idx until
|
|
@@ -394,9 +459,9 @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
|
|
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
|
|
struct vhost_vring_addr *addr)
|
|
{
|
|
- addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
|
|
- addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
|
|
- addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
|
|
+ addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
|
|
+ addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
|
|
+ addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
|
|
}
|
|
|
|
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
|
|
@@ -517,11 +582,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
|
|
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
|
|
* shadow methods and file descriptors.
|
|
*
|
|
+ * @iova_tree: Tree to perform descriptors translations
|
|
+ *
|
|
* Returns the new virtqueue or NULL.
|
|
*
|
|
* In case of error, reason is reported through error_report.
|
|
*/
|
|
-VhostShadowVirtqueue *vhost_svq_new(void)
|
|
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
|
|
{
|
|
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
|
|
int r;
|
|
@@ -542,6 +609,7 @@ VhostShadowVirtqueue *vhost_svq_new(void)
|
|
|
|
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
|
|
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
|
|
+ svq->iova_tree = iova_tree;
|
|
return g_steal_pointer(&svq);
|
|
|
|
err_init_hdev_call:
|
|
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
|
|
index 38b3b91ca7..e5e24c536d 100644
|
|
--- a/hw/virtio/vhost-shadow-virtqueue.h
|
|
+++ b/hw/virtio/vhost-shadow-virtqueue.h
|
|
@@ -13,6 +13,7 @@
|
|
#include "qemu/event_notifier.h"
|
|
#include "hw/virtio/virtio.h"
|
|
#include "standard-headers/linux/vhost_types.h"
|
|
+#include "hw/virtio/vhost-iova-tree.h"
|
|
|
|
/* Shadow virtqueue to relay notifications */
|
|
typedef struct VhostShadowVirtqueue {
|
|
@@ -43,6 +44,9 @@ typedef struct VhostShadowVirtqueue {
|
|
/* Virtio device */
|
|
VirtIODevice *vdev;
|
|
|
|
+ /* IOVA mapping */
|
|
+ VhostIOVATree *iova_tree;
|
|
+
|
|
/* Map for use the guest's descriptors */
|
|
VirtQueueElement **ring_id_maps;
|
|
|
|
@@ -75,7 +79,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
|
|
VirtQueue *vq);
|
|
void vhost_svq_stop(VhostShadowVirtqueue *svq);
|
|
|
|
-VhostShadowVirtqueue *vhost_svq_new(void);
|
|
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
|
|
|
|
void vhost_svq_free(gpointer vq);
|
|
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
|
|
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
|
|
index db34f26246..8245345bcd 100644
|
|
--- a/hw/virtio/vhost-vdpa.c
|
|
+++ b/hw/virtio/vhost-vdpa.c
|
|
@@ -211,6 +211,21 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
|
|
vaddr, section->readonly);
|
|
|
|
llsize = int128_sub(llend, int128_make64(iova));
|
|
+ if (v->shadow_vqs_enabled) {
|
|
+ DMAMap mem_region = {
|
|
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
|
|
+ .size = int128_get64(llsize) - 1,
|
|
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
|
|
+ };
|
|
+
|
|
+ int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
|
|
+ if (unlikely(r != IOVA_OK)) {
|
|
+ error_report("Can't allocate a mapping (%d)", r);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ iova = mem_region.iova;
|
|
+ }
|
|
|
|
vhost_vdpa_iotlb_batch_begin_once(v);
|
|
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
|
|
@@ -263,6 +278,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
|
|
|
|
llsize = int128_sub(llend, int128_make64(iova));
|
|
|
|
+ if (v->shadow_vqs_enabled) {
|
|
+ const DMAMap *result;
|
|
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
|
|
+ section->offset_within_region +
|
|
+ (iova - section->offset_within_address_space);
|
|
+ DMAMap mem_region = {
|
|
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
|
|
+ .size = int128_get64(llsize) - 1,
|
|
+ };
|
|
+
|
|
+ result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
|
|
+ iova = result->iova;
|
|
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
|
|
+ }
|
|
vhost_vdpa_iotlb_batch_begin_once(v);
|
|
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
|
|
if (ret) {
|
|
@@ -372,7 +401,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
|
|
|
|
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
|
|
for (unsigned n = 0; n < hdev->nvqs; ++n) {
|
|
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
|
|
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
|
|
|
|
if (unlikely(!svq)) {
|
|
error_setg(errp, "Cannot create svq %u", n);
|
|
@@ -809,33 +838,70 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
|
|
/**
|
|
* Unmap a SVQ area in the device
|
|
*/
|
|
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
|
|
- hwaddr size)
|
|
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
|
|
+ const DMAMap *needle)
|
|
{
|
|
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
|
|
+ hwaddr size;
|
|
int r;
|
|
|
|
- size = ROUND_UP(size, qemu_real_host_page_size);
|
|
- r = vhost_vdpa_dma_unmap(v, iova, size);
|
|
+ if (unlikely(!result)) {
|
|
+ error_report("Unable to find SVQ address to unmap");
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ size = ROUND_UP(result->size, qemu_real_host_page_size);
|
|
+ r = vhost_vdpa_dma_unmap(v, result->iova, size);
|
|
return r == 0;
|
|
}
|
|
|
|
static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
|
|
const VhostShadowVirtqueue *svq)
|
|
{
|
|
+ DMAMap needle = {};
|
|
struct vhost_vdpa *v = dev->opaque;
|
|
struct vhost_vring_addr svq_addr;
|
|
- size_t device_size = vhost_svq_device_area_size(svq);
|
|
- size_t driver_size = vhost_svq_driver_area_size(svq);
|
|
bool ok;
|
|
|
|
vhost_svq_get_vring_addr(svq, &svq_addr);
|
|
|
|
- ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
|
|
+ needle.translated_addr = svq_addr.desc_user_addr;
|
|
+ ok = vhost_vdpa_svq_unmap_ring(v, &needle);
|
|
if (unlikely(!ok)) {
|
|
return false;
|
|
}
|
|
|
|
- return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
|
|
+ needle.translated_addr = svq_addr.used_user_addr;
|
|
+ return vhost_vdpa_svq_unmap_ring(v, &needle);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Map the SVQ area in the device
|
|
+ *
|
|
+ * @v: Vhost-vdpa device
|
|
+ * @needle: The area to search iova
|
|
+ * @errorp: Error pointer
|
|
+ */
|
|
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
|
|
+ Error **errp)
|
|
+{
|
|
+ int r;
|
|
+
|
|
+ r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
|
|
+ if (unlikely(r != IOVA_OK)) {
|
|
+ error_setg(errp, "Cannot allocate iova (%d)", r);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
|
|
+ (void *)(uintptr_t)needle->translated_addr,
|
|
+ needle->perm == IOMMU_RO);
|
|
+ if (unlikely(r != 0)) {
|
|
+ error_setg_errno(errp, -r, "Cannot map region to device");
|
|
+ vhost_iova_tree_remove(v->iova_tree, needle);
|
|
+ }
|
|
+
|
|
+ return r == 0;
|
|
}
|
|
|
|
/**
|
|
@@ -851,28 +917,44 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
|
|
struct vhost_vring_addr *addr,
|
|
Error **errp)
|
|
{
|
|
+ DMAMap device_region, driver_region;
|
|
+ struct vhost_vring_addr svq_addr;
|
|
struct vhost_vdpa *v = dev->opaque;
|
|
size_t device_size = vhost_svq_device_area_size(svq);
|
|
size_t driver_size = vhost_svq_driver_area_size(svq);
|
|
- int r;
|
|
+ size_t avail_offset;
|
|
+ bool ok;
|
|
|
|
ERRP_GUARD();
|
|
- vhost_svq_get_vring_addr(svq, addr);
|
|
+ vhost_svq_get_vring_addr(svq, &svq_addr);
|
|
|
|
- r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
|
|
- (void *)(uintptr_t)addr->desc_user_addr, true);
|
|
- if (unlikely(r != 0)) {
|
|
- error_setg_errno(errp, -r, "Cannot create vq driver region: ");
|
|
+ driver_region = (DMAMap) {
|
|
+ .translated_addr = svq_addr.desc_user_addr,
|
|
+ .size = driver_size - 1,
|
|
+ .perm = IOMMU_RO,
|
|
+ };
|
|
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
|
|
+ if (unlikely(!ok)) {
|
|
+ error_prepend(errp, "Cannot create vq driver region: ");
|
|
return false;
|
|
}
|
|
+ addr->desc_user_addr = driver_region.iova;
|
|
+ avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
|
|
+ addr->avail_user_addr = driver_region.iova + avail_offset;
|
|
|
|
- r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
|
|
- (void *)(intptr_t)addr->used_user_addr, false);
|
|
- if (unlikely(r != 0)) {
|
|
- error_setg_errno(errp, -r, "Cannot create vq device region: ");
|
|
+ device_region = (DMAMap) {
|
|
+ .translated_addr = svq_addr.used_user_addr,
|
|
+ .size = device_size - 1,
|
|
+ .perm = IOMMU_RW,
|
|
+ };
|
|
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
|
|
+ if (unlikely(!ok)) {
|
|
+ error_prepend(errp, "Cannot create vq device region: ");
|
|
+ vhost_vdpa_svq_unmap_ring(v, &driver_region);
|
|
}
|
|
+ addr->used_user_addr = device_region.iova;
|
|
|
|
- return r == 0;
|
|
+ return ok;
|
|
}
|
|
|
|
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
|
|
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
|
|
index 009a9f3b6b..ee8e939ad0 100644
|
|
--- a/include/hw/virtio/vhost-vdpa.h
|
|
+++ b/include/hw/virtio/vhost-vdpa.h
|
|
@@ -14,6 +14,7 @@
|
|
|
|
#include <gmodule.h>
|
|
|
|
+#include "hw/virtio/vhost-iova-tree.h"
|
|
#include "hw/virtio/virtio.h"
|
|
#include "standard-headers/linux/vhost_types.h"
|
|
|
|
@@ -30,6 +31,8 @@ typedef struct vhost_vdpa {
|
|
MemoryListener listener;
|
|
struct vhost_vdpa_iova_range iova_range;
|
|
bool shadow_vqs_enabled;
|
|
+ /* IOVA mapping used by the Shadow Virtqueue */
|
|
+ VhostIOVATree *iova_tree;
|
|
GPtrArray *shadow_vqs;
|
|
struct vhost_dev *dev;
|
|
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
|
|
--
|
|
2.27.0
|
|
|