asterinas/tools/docker/tdx/tdx_qemu.patch

# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2025 Intel Corporation

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 2c83b6d270..b9aebe1ea6 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2983,6 +2983,8 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
         addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
         rb = qemu_ram_block_from_host(addr, false, &offset);

+        memory_region_convert_mem_attr(&section, !to_private);
+
         if (to_private) {
             if (rb->page_size != qemu_host_page_size) {
                 /*
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index 745ead0034..6cef1b5ff2 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -56,6 +56,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     ram_flags = backend->share ? RAM_SHARED : 0;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
+    ram_flags |= m->hugetlb ? RAM_GUEST_MEMFD_HUGETLB : 0;
     return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
                                           backend->size, ram_flags, fd, 0, errp);
 }
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 059bfdc07a..d3f7cc93e7 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -251,6 +251,7 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
     return (!memory_region_is_ram(section->mr) &&
             !memory_region_is_iommu(section->mr)) ||
            memory_region_is_protected(section->mr) ||
+           memory_region_has_guest_memfd(section->mr) ||
            /*
             * Sizing an enabled 64-bit BAR can cause spurious mappings to
             * addresses in the upper part of the 64-bit address space.  These
@@ -347,12 +348,9 @@ out:
     rcu_read_unlock();
 }

-static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
-                                            MemoryRegionSection *section)
+static void vfio_notify_discard_generic(VFIOContainerBase *bcontainer,
+                                        MemoryRegionSection *section)
 {
-    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
-                                                listener);
-    VFIOContainerBase *bcontainer = vrdl->bcontainer;
     const hwaddr size = int128_get64(section->size);
     const hwaddr iova = section->offset_within_address_space;
     int ret;
@@ -365,12 +363,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
     }
 }

-static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
-                                            MemoryRegionSection *section)
+static int vfio_notify_populate_generic(VFIOContainerBase *bcontainer,
+                                        MemoryRegionSection *section,
+                                        uint64_t granularity)
 {
-    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
-                                                listener);
-    VFIOContainerBase *bcontainer = vrdl->bcontainer;
     const hwaddr end = section->offset_within_region +
                        int128_get64(section->size);
     hwaddr start, next, iova;
@@ -382,7 +378,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
      * unmap in minimum granularity later.
      */
     for (start = section->offset_within_region; start < end; start = next) {
-        next = ROUND_UP(start + 1, vrdl->granularity);
+        next = ROUND_UP(start + 1, granularity);
         next = MIN(next, end);

         iova = start - section->offset_within_region +
@@ -393,13 +389,31 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
                                      vaddr, section->readonly);
         if (ret) {
             /* Rollback */
-            vfio_ram_discard_notify_discard(rdl, section);
+            vfio_notify_discard_generic(bcontainer, section);
             return ret;
         }
     }
     return 0;
 }

+static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
+                                            MemoryRegionSection *section)
+{
+    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+                                                listener);
+
+    vfio_notify_discard_generic(vrdl->bcontainer, section);
+}
+
+static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
+                                            MemoryRegionSection *section)
+{
+    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+                                                listener);
+
+    return vfio_notify_populate_generic(vrdl->bcontainer, section, vrdl->granularity);
+}
+
 static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
                                                MemoryRegionSection *section)
 {
@@ -1353,6 +1367,19 @@ static void vfio_listener_log_sync(MemoryListener *listener,
     }
 }

+static void vfio_listener_convert_mem_attr(MemoryListener *listener,
+                                           MemoryRegionSection *section,
+                                           bool shared)
+{
+    VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, listener);
+
+    if (shared)
+        vfio_notify_populate_generic(bcontainer, section,
+            1ULL << (63 - clz64(bcontainer->pgsizes)));
+    else
+        vfio_notify_discard_generic(bcontainer, section);
+}
+
 const MemoryListener vfio_memory_listener = {
     .name = "vfio",
     .region_add = vfio_listener_region_add,
@@ -1360,6 +1387,7 @@ const MemoryListener vfio_memory_listener = {
     .log_global_start = vfio_listener_log_global_start,
     .log_global_stop = vfio_listener_log_global_stop,
     .log_sync = vfio_listener_log_sync,
+    .convert_mem_attr = vfio_listener_convert_mem_attr,
 };

 void vfio_reset_handler(void *opaque)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 1e351f6fc8..d17acdb2ea 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -246,6 +246,9 @@ typedef struct IOMMUTLBEvent {
 /* RAM can be private that has kvm guest memfd backend */
 #define RAM_GUEST_MEMFD   (1 << 12)

+/* Hugetlb can be private that has kvm guest memfd backend */
+#define RAM_GUEST_MEMFD_HUGETLB (1 << 13)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
@@ -1086,6 +1089,19 @@ struct MemoryListener {
      */
     void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section,
                                hwaddr addr, hwaddr len);
+
+    /**
+     * @convert_mem_attr:
+     *
+     * Called during the memory attribute conversion.
+     *
+     * @listener: The #MemoryListener
+     * @section: The MemoryRegionSection
+     * @shared: convert memory attribute from private to shared
+     */
+    void (*convert_mem_attr)(MemoryListener *listener, MemoryRegionSection *section,
+                            bool shared);
+
     /**
      * @priority:
      *
@@ -2541,6 +2557,14 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
  */
 void memory_global_dirty_log_sync(bool last_stage);

+/**
+ * memory_region_convert_mem_attr: convert the memory attribute
+ * @section: the #MemoryRegionSection to be converted
+ * @shared: if true, convert attribute from private to shared;
+ *          if false, convert from shared to private
+ */
+void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared);
+
 /**
  * memory_global_dirty_log_sync: synchronize the dirty log for all memory
  *
diff --git a/system/memory.c b/system/memory.c
index 85a22408e9..e9a94e1654 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -3009,6 +3009,21 @@ void memory_global_dirty_log_stop(unsigned int flags)
     memory_global_dirty_log_do_stop(flags);
 }

+void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared)
+{
+    MemoryListener *listener;
+    if (!section->mr || !memory_region_has_guest_memfd(section->mr)) {
+        return;
+    }
+
+    QTAILQ_FOREACH(listener, &memory_listeners, link) {
+        if (!listener->convert_mem_attr) {
+            continue;
+        }
+        listener->convert_mem_attr(listener, section, shared);
+    }
+}
+
 static void listener_add_address_space(MemoryListener *listener,
                                        AddressSpace *as)
 {
diff --git a/system/physmem.c b/system/physmem.c
index 8c9368bc99..688f76e425 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1803,6 +1803,10 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
     }
 }

+#ifdef CONFIG_KVM
+#define KVM_GUEST_MEMFD_HUGETLB               		 (1 << 1)
+#endif
+
 static void ram_block_add(RAMBlock *new_block, Error **errp)
 {
     const bool noreserve = qemu_ram_is_noreserve(new_block);
@@ -1844,8 +1848,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
     if (kvm_enabled() && (new_block->flags & RAM_GUEST_MEMFD)) {
         assert(new_block->guest_memfd < 0);

-        new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
-                                                        0, errp);
+		new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
+                                                         (new_block->flags & RAM_GUEST_MEMFD_HUGETLB) ? KVM_GUEST_MEMFD_HUGETLB : 0, errp);
         if (new_block->guest_memfd < 0) {
             qemu_mutex_unlock_ramlist();
             return;
@@ -1914,7 +1918,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     /* Just support these ram flags by now. */
     assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
                           RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
-                          RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
+                          RAM_READONLY_FD | RAM_GUEST_MEMFD |RAM_GUEST_MEMFD_HUGETLB)) == 0);

     if (xen_enabled()) {
         error_setg(errp, "-mem-path not supported with Xen");