# SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2025 Intel Corporation diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 2c83b6d270..b9aebe1ea6 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2983,6 +2983,8 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; rb = qemu_ram_block_from_host(addr, false, &offset); + memory_region_convert_mem_attr(§ion, !to_private); + if (to_private) { if (rb->page_size != qemu_host_page_size) { /* diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index 745ead0034..6cef1b5ff2 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -56,6 +56,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + ram_flags |= m->hugetlb ? RAM_GUEST_MEMFD_HUGETLB : 0; return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, fd, 0, errp); } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 059bfdc07a..d3f7cc93e7 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -251,6 +251,7 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) return (!memory_region_is_ram(section->mr) && !memory_region_is_iommu(section->mr)) || memory_region_is_protected(section->mr) || + memory_region_has_guest_memfd(section->mr) || /* * Sizing an enabled 64-bit BAR can cause spurious mappings to * addresses in the upper part of the 64-bit address space. These @@ -347,12 +348,9 @@ out: rcu_read_unlock(); } -static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, - MemoryRegionSection *section) +static void vfio_notify_discard_generic(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) { - VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, - listener); - VFIOContainerBase *bcontainer = vrdl->bcontainer; const hwaddr size = int128_get64(section->size); const hwaddr iova = section->offset_within_address_space; int ret; @@ -365,12 +363,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, } } -static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - MemoryRegionSection *section) +static int vfio_notify_populate_generic(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, + uint64_t granularity) { - VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, - listener); - VFIOContainerBase *bcontainer = vrdl->bcontainer; const hwaddr end = section->offset_within_region + int128_get64(section->size); hwaddr start, next, iova; @@ -382,7 +378,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, * unmap in minimum granularity later. */ for (start = section->offset_within_region; start < end; start = next) { - next = ROUND_UP(start + 1, vrdl->granularity); + next = ROUND_UP(start + 1, granularity); next = MIN(next, end); iova = start - section->offset_within_region + @@ -393,13 +389,31 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, vaddr, section->readonly); if (ret) { /* Rollback */ - vfio_ram_discard_notify_discard(rdl, section); + vfio_notify_discard_generic(bcontainer, section); return ret; } } return 0; } +static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); + + vfio_notify_discard_generic(vrdl->bcontainer, section); +} + +static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); + + return vfio_notify_populate_generic(vrdl->bcontainer, section, vrdl->granularity); +} + static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { @@ -1353,6 +1367,19 @@ static void vfio_listener_log_sync(MemoryListener *listener, } } +static void vfio_listener_convert_mem_attr(MemoryListener *listener, + MemoryRegionSection *section, + bool shared) +{ + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, listener); + + if (shared) + vfio_notify_populate_generic(bcontainer, section, + 1ULL << (63 - clz64(bcontainer->pgsizes))); + else + vfio_notify_discard_generic(bcontainer, section); +} + const MemoryListener vfio_memory_listener = { .name = "vfio", .region_add = vfio_listener_region_add, @@ -1360,6 +1387,7 @@ const MemoryListener vfio_memory_listener = { .log_global_start = vfio_listener_log_global_start, .log_global_stop = vfio_listener_log_global_stop, .log_sync = vfio_listener_log_sync, + .convert_mem_attr = vfio_listener_convert_mem_attr, }; void vfio_reset_handler(void *opaque) diff --git a/include/exec/memory.h b/include/exec/memory.h index 1e351f6fc8..d17acdb2ea 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -246,6 +246,9 @@ typedef struct IOMMUTLBEvent { /* RAM can be private that has kvm guest memfd backend */ #define RAM_GUEST_MEMFD (1 << 12) +/* Hugetlb can be private that has kvm guest memfd backend */ +#define RAM_GUEST_MEMFD_HUGETLB (1 << 13) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, @@ -1086,6 +1089,19 @@ struct MemoryListener { */ void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section, hwaddr addr, hwaddr len); + + /** + * @convert_mem_attr: + * + * Called during the memory attribute conversion. + * + * @listener: The #MemoryListener + * @section: The MemoryRegionSection + * @shared: convert memory attribute from private to shared + */ + void (*convert_mem_attr)(MemoryListener *listener, MemoryRegionSection *section, + bool shared); + /** * @priority: * @@ -2541,6 +2557,14 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, */ void memory_global_dirty_log_sync(bool last_stage); +/** + * memory_region_convert_mem_attr: convert the memory attribute + * @section: the #MemoryRegionSection to be converted + * @shared: if true, convert attribute from private to shared; + * if false, convert from shared to private + */ +void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared); + /** * memory_global_dirty_log_sync: synchronize the dirty log for all memory * diff --git a/system/memory.c b/system/memory.c index 85a22408e9..e9a94e1654 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3009,6 +3009,21 @@ void memory_global_dirty_log_stop(unsigned int flags) memory_global_dirty_log_do_stop(flags); } +void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared) +{ + MemoryListener *listener; + if (!section->mr || !memory_region_has_guest_memfd(section->mr)) { + return; + } + + QTAILQ_FOREACH(listener, &memory_listeners, link) { + if (!listener->convert_mem_attr) { + continue; + } + listener->convert_mem_attr(listener, section, shared); + } +} + static void listener_add_address_space(MemoryListener *listener, AddressSpace *as) { diff --git a/system/physmem.c b/system/physmem.c index 8c9368bc99..688f76e425 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1803,6 +1803,10 @@ static void dirty_memory_extend(ram_addr_t old_ram_size, } } +#ifdef CONFIG_KVM +#define KVM_GUEST_MEMFD_HUGETLB (1 << 1) +#endif + static void ram_block_add(RAMBlock *new_block, Error **errp) { const bool noreserve = qemu_ram_is_noreserve(new_block); @@ -1844,8 +1848,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) if (kvm_enabled() && (new_block->flags & RAM_GUEST_MEMFD)) { assert(new_block->guest_memfd < 0); - new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, - 0, errp); + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, + (new_block->flags & RAM_GUEST_MEMFD_HUGETLB) ? KVM_GUEST_MEMFD_HUGETLB : 0, errp); if (new_block->guest_memfd < 0) { qemu_mutex_unlock_ramlist(); return; @@ -1914,7 +1918,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, /* Just support these ram flags by now. */ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | - RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); + RAM_READONLY_FD | RAM_GUEST_MEMFD |RAM_GUEST_MEMFD_HUGETLB)) == 0); if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen");