mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-08 21:06:48 +00:00
Upgrade TDX stack and refactor the Asterinas Docker system
This commit is contained in:
parent
764e3afa7c
commit
6912cca51f
@ -54,18 +54,16 @@ grub.protocol = "linux"
|
||||
qemu.args = """\
|
||||
-accel kvm \
|
||||
-name process=tdxvm,debug-threads=on \
|
||||
-m 6G \
|
||||
-m 8G \
|
||||
-vga none \
|
||||
-monitor pty \
|
||||
-no-hpet \
|
||||
-nodefaults \
|
||||
-drive file=target/osdk/asterinas/asterinas.qcow2,if=virtio,format=qcow2 \
|
||||
-monitor telnet:127.0.0.1:9001,server,nowait \
|
||||
-bios /usr/share/qemu/OVMF.fd \
|
||||
-object tdx-guest,sept-ve-disable=on,id=tdx,quote-generation-service=vsock:2:4050 \
|
||||
-object memory-backend-memfd-private,id=ram1,size=2G \
|
||||
-cpu host,-kvm-steal-time,pmu=off,tsc-freq=1000000000 \
|
||||
-machine q35,kernel_irqchip=split,confidential-guest-support=tdx,memory-backend=ram1 \
|
||||
-bios /root/ovmf/release/OVMF.fd \
|
||||
-object tdx-guest,sept-ve-disable=on,id=tdx0 \
|
||||
-cpu host,-kvm-steal-time,pmu=off \
|
||||
-machine q35,kernel-irqchip=split,confidential-guest-support=tdx0 \
|
||||
-smp 1 \
|
||||
-nographic \
|
||||
"""
|
||||
|
@ -57,12 +57,11 @@ you will be assisted with a convenient utility script, `tools/bump_version.sh`,
|
||||
|
||||
### Commit 1: "Bump the Docker image version"
|
||||
|
||||
After updating the Docker image content
|
||||
(specified by the `tools/docker/Dockerfile.jinja` file),
|
||||
After updating the Docker image content,
|
||||
increment the Docker image version using the following command:
|
||||
|
||||
```
|
||||
bump_version.sh --docker_version_file [major | minor | patch | date]
|
||||
./bump_version.sh --docker_version_file [major | minor | patch | date]
|
||||
```
|
||||
|
||||
The second argument specifies which part of the Docker image version to increment.
|
||||
@ -82,7 +81,7 @@ write a follow-up commit to
|
||||
update all Docker image version references across the codebase.
|
||||
|
||||
```
|
||||
bump_version.sh --docker_version_refs
|
||||
./bump_version.sh --docker_version_refs
|
||||
```
|
||||
|
||||
If your purpose is to publish non-breaking changes to the Docker images,
|
||||
@ -96,7 +95,7 @@ synchronize the version number in `VERSION` with
|
||||
that in `DOCKER_IMAGE_VERSION` by running:
|
||||
|
||||
```
|
||||
bump_version.sh --version_file
|
||||
./bump_version.sh --version_file
|
||||
```
|
||||
|
||||
This command also updates all version numbers
|
||||
|
@ -9,13 +9,11 @@ qemu.args = """
|
||||
-vga none \
|
||||
-nographic \
|
||||
-monitor pty \
|
||||
-no-hpet \
|
||||
-nodefaults \
|
||||
-bios /usr/share/qemu/OVMF.fd \
|
||||
-object tdx-guest,sept-ve-disable=on,id=tdx,quote-generation-service=vsock:2:4050 \
|
||||
-bios /root/ovmf/release/OVMF.fd \
|
||||
-object tdx-guest,sept-ve-disable=on,id=tdx0 \
|
||||
-cpu host,-kvm-steal-time,pmu=off \
|
||||
-machine q35,kernel_irqchip=split,confidential-guest-support=tdx,memory-backend=ram1 \
|
||||
-object memory-backend-memfd-private,id=ram1,size=8G \
|
||||
-machine q35,kernel-irqchip=split,confidential-guest-support=tdx0 \
|
||||
-device virtio-keyboard-pci,disable-legacy=on,disable-modern=off \
|
||||
-chardev stdio,id=mux,mux=on,logfile=qemu.log \
|
||||
-device virtio-serial,romfile= \
|
||||
|
1
tools/docker/.gitignore
vendored
1
tools/docker/.gitignore
vendored
@ -1 +0,0 @@
|
||||
**/Dockerfile
|
@ -2,7 +2,7 @@
|
||||
|
||||
#= Install packages for Docker building ====================================
|
||||
|
||||
FROM {{ base_image }} as build-base
|
||||
FROM ubuntu:22.04 AS build-base
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
@ -39,7 +39,7 @@ RUN wget https://raw.githubusercontent.com/asterinas/linux_binary_cache/14598b6/
|
||||
|
||||
#= Build benchmark =========================================================
|
||||
|
||||
FROM build-base as build-benchmarks
|
||||
FROM build-base AS build-benchmarks
|
||||
|
||||
# Download the source files of benchmarks
|
||||
WORKDIR /root
|
||||
@ -140,7 +140,7 @@ RUN rm -rf sysbench-1.0.20 \
|
||||
|
||||
#= Install applications =======================================================
|
||||
|
||||
FROM build-base as build-applications
|
||||
FROM build-base AS build-applications
|
||||
|
||||
# Install SQLite
|
||||
WORKDIR /root
|
||||
@ -278,7 +278,7 @@ RUN rm -rf wrk
|
||||
|
||||
#= Build syscall test =========================================================
|
||||
|
||||
FROM build-base as build-bazel
|
||||
FROM build-base AS build-bazel
|
||||
|
||||
# Install bazel, which is required by the system call test suite from Gvisor project
|
||||
RUN mkdir -p /root/bazel
|
||||
@ -289,7 +289,7 @@ RUN apt clean && rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /root
|
||||
RUN rm -rf bazel
|
||||
|
||||
FROM build-bazel as syscall_test
|
||||
FROM build-bazel AS syscall_test
|
||||
|
||||
# Build the syscall test binaries
|
||||
COPY test/syscall_test /root/syscall_test
|
||||
@ -297,10 +297,9 @@ WORKDIR /root/syscall_test
|
||||
RUN export BUILD_DIR=build && \
|
||||
make ${BUILD_DIR}/syscall_test_bins
|
||||
|
||||
{% if not intel_tdx %}
|
||||
#= Build QEMU =================================================================
|
||||
|
||||
FROM build-base as build-qemu
|
||||
FROM build-base AS build-qemu
|
||||
|
||||
RUN apt update && apt-get install -y --no-install-recommends \
|
||||
libgcrypt-dev `# optional build dependency` \
|
||||
@ -311,7 +310,7 @@ RUN apt update && apt-get install -y --no-install-recommends \
|
||||
ninja-build
|
||||
RUN apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
FROM build-qemu as qemu
|
||||
FROM build-qemu AS qemu
|
||||
|
||||
# Fetch and install QEMU from the official source
|
||||
#
|
||||
@ -331,7 +330,7 @@ RUN rm -rf /root/qemu
|
||||
|
||||
#= Build OVMF =================================================================
|
||||
|
||||
FROM build-base as build-ovmf
|
||||
FROM build-base AS build-ovmf
|
||||
|
||||
RUN apt update && apt-get install -y --no-install-recommends \
|
||||
bison \
|
||||
@ -341,7 +340,7 @@ RUN apt update && apt-get install -y --no-install-recommends \
|
||||
uuid-dev
|
||||
RUN apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
FROM build-ovmf as ovmf
|
||||
FROM build-ovmf AS ovmf
|
||||
|
||||
# Fetch and build OVMF from the EDK2 official source
|
||||
WORKDIR /root
|
||||
@ -354,7 +353,7 @@ RUN /bin/bash -c "source ./edksetup.sh \
|
||||
|
||||
#= Build GRUB =================================================================
|
||||
|
||||
FROM build-base as build-grub
|
||||
FROM build-base AS build-grub
|
||||
|
||||
RUN apt update && apt-get install -y --no-install-recommends \
|
||||
autoconf \
|
||||
@ -368,7 +367,7 @@ RUN apt update && apt-get install -y --no-install-recommends \
|
||||
pkg-config
|
||||
RUN apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
FROM build-grub as grub
|
||||
FROM build-grub AS grub
|
||||
|
||||
# Fetch and install GRUB from the GNU official source
|
||||
#
|
||||
@ -398,15 +397,14 @@ RUN echo depends bli part_gpt > grub-core/extra_deps.lst \
|
||||
&& make install
|
||||
WORKDIR /root
|
||||
RUN rm -rf /root/grub
|
||||
{% endif %}
|
||||
|
||||
#= Build busybox ==============================================================
|
||||
|
||||
FROM build-base as build-busybox
|
||||
FROM build-base AS build-busybox
|
||||
|
||||
RUN apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
FROM build-busybox as busybox
|
||||
FROM build-busybox AS busybox
|
||||
|
||||
WORKDIR /root
|
||||
RUN wget -O busybox.tar.bz2 https://busybox.net/downloads/busybox-1.35.0.tar.bz2 \
|
||||
@ -421,7 +419,7 @@ RUN make defconfig \
|
||||
|
||||
#= The final stages to produce the Asterinas development image ====================
|
||||
|
||||
FROM build-base as rust
|
||||
FROM build-base AS rust
|
||||
|
||||
# Install Rust with both nightly and stable
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
@ -451,11 +449,8 @@ RUN apt update && apt-get install -y --no-install-recommends \
|
||||
file \
|
||||
gdb \
|
||||
grub-efi-amd64 \
|
||||
{% if not intel_tdx %}
|
||||
grub-efi-amd64-bin \
|
||||
grub-efi-amd64-dbg \
|
||||
ovmf `# provide an alternative stable firmware` \
|
||||
{% endif %}
|
||||
iptables \
|
||||
iproute2 \
|
||||
libnl-3-dev `# dependency for netlink socket` \
|
||||
@ -479,7 +474,6 @@ RUN apt clean && rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=syscall_test /root/syscall_test/build/syscall_test_bins /root/syscall_test_bins
|
||||
ENV ASTER_PREBUILT_SYSCALL_TEST=/root/syscall_test_bins
|
||||
|
||||
{% if not intel_tdx %}
|
||||
# Install QEMU built from the previous stages
|
||||
COPY --from=qemu /usr/local/qemu /usr/local/qemu
|
||||
ENV PATH="/usr/local/qemu/bin:${PATH}"
|
||||
@ -494,7 +488,6 @@ COPY --from=grub /usr/local/grub /usr/local/grub
|
||||
ENV PATH="/usr/local/grub/bin:${PATH}"
|
||||
# Make a symbolic link for `unicode.pf2` from Ubuntu 22.04 package
|
||||
RUN ln -sf /usr/share/grub/unicode.pf2 /usr/local/grub/share/grub/unicode.pf2
|
||||
{% endif %}
|
||||
|
||||
# Install Busybox built from the previous stages
|
||||
COPY --from=busybox /root/busybox/busybox /bin/busybox
|
@ -7,49 +7,32 @@ Asterinas development Docker images are provided to facilitate developing and te
|
||||
To build a Docker image for Asterinas and test it on your local machine, navigate to the root directory of the Asterinas source code tree and execute the following command:
|
||||
|
||||
```bash
|
||||
cd <asterinas dir>/tools/docker
|
||||
# Generate Dockerfile
|
||||
python3 gen_dockerfile.py
|
||||
cd <asterinas dir>
|
||||
# Build Docker image
|
||||
docker buildx build \
|
||||
-f tools/docker/Dockerfile \
|
||||
--build-arg ASTER_RUST_VERSION=${RUST_VERSION} \
|
||||
-t asterinas/asterinas:${ASTER_VERSION} \
|
||||
--build-arg ASTER_RUST_VERSION=$(grep "channel" rust-toolchain.toml | awk -F '"' '{print $2}') \
|
||||
-t asterinas/asterinas:$(cat VERSION)-$(date +%Y%m%d) \
|
||||
.
|
||||
```
|
||||
|
||||
The meanings of the two environment variables in the command are as follows:
|
||||
|
||||
- `${ASTER_VERSION}`: Represents the version number of Asterinas. You can find this in the `VERSION` file.
|
||||
- `${RUST_VERSION}`: Denotes the required Rust toolchain version, as specified in the `rust-toolchain` file.
|
||||
|
||||
For Intel TDX Docker Image, you can execute the following command:
|
||||
For the Intel TDX Docker image, it is based on a general Docker image. You can execute the following command:
|
||||
|
||||
```bash
|
||||
cd <asterinas dir>/tools/docker
|
||||
# Generate Dockerfile for Intel TDX
|
||||
python3 gen_dockerfile.py --intel-tdx
|
||||
cd <asterinas dir>
|
||||
# Build Docker image
|
||||
# Build Intel TDX Docker image
|
||||
docker buildx build \
|
||||
-f tools/docker/Dockerfile \
|
||||
--build-arg ASTER_RUST_VERSION=${RUST_VERSION} \
|
||||
-t asterinas/asterinas:${ASTER_VERSION}-tdx \
|
||||
-f tools/docker/tdx/Dockerfile \
|
||||
--build-arg ASTER_RUST_VERSION=$(grep "channel" rust-toolchain.toml | awk -F '"' '{print $2}') \
|
||||
--build-arg BASE_VERSION=${BASE_VERSION} \
|
||||
-t asterinas/asterinas:$(cat VERSION)-$(date +%Y%m%d)-tdx \
|
||||
.
|
||||
```
|
||||
|
||||
## Tagging Docker Images
|
||||
Where `BASE_VERSION` represents the general Docker image you want to base it on.
|
||||
|
||||
It's essential for each Asterinas Docker image to have a distinct tag. By convention, the tag is assigned with the version number of the Asterinas project itself. This methodology ensures clear correspondence between a commit of the source code and its respective Docker image.
|
||||
## Tagging and Uploading Docker Images
|
||||
|
||||
If a commit needs to create a new Docker image, it should
|
||||
Regarding the tagging Docker images, please refer to this [link](https://asterinas.github.io/book/to-contribute/version-bump.html).
|
||||
|
||||
1. Update the Dockerfile as well as other materials relevant to the Docker image, and
|
||||
2. Run [`tools/bump_version.sh`](../bump_version.sh) tool to update the Asterinas project's version number.
|
||||
|
||||
For bug fixes or small changes, increment the last number of a [SemVer](https://semver.org/) by one. For major features or releases, increment the second number. All changes made in the two steps should be included in the commit.
|
||||
|
||||
## Uploading Docker Images
|
||||
|
||||
New versions of Asterinas's Docker images are automatically uploaded to DockerHub through Github Actions. Simply submit your PR that updates Asterinas's Docker image for review. After getting the project maintainers' approval, the [Docker image building workflow](../../.github/workflows/docker_build.yml) will be started, building the new Docker image and pushing it to DockerHub.
|
||||
New versions of Asterinas's Docker images are automatically uploaded to DockerHub through Github Actions. Simply submit your PR that updates Asterinas's Docker image for review. After getting the project maintainers' approval, the [Docker image building workflow](../../.github/workflows/publish_docker_images.yml) will be started, building the new Docker image and pushing it to DockerHub.
|
||||
|
@ -1,58 +0,0 @@
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(description='The Dockerfile generator for OSDK.')
|
||||
parser.add_argument('--intel-tdx', action='store_true', help='Include Intel TDX support')
|
||||
parser.add_argument(
|
||||
'--out-dir',
|
||||
type=str,
|
||||
default='.',
|
||||
help='Output the Dockerfile under this directory. \
|
||||
By default, the output directory is the current working directory.'
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
def setup_output_directory(out_dir):
|
||||
if os.path.isabs(out_dir):
|
||||
logging.error("The --out-dir argument must be a relative path.")
|
||||
sys.exit(1)
|
||||
template_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
if out_dir == '.':
|
||||
return template_dir
|
||||
output_directory_path = os.path.join(template_dir, out_dir)
|
||||
if not os.path.exists(output_directory_path):
|
||||
os.makedirs(output_directory_path)
|
||||
return output_directory_path
|
||||
|
||||
def load_template():
|
||||
template_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
env = Environment(loader=FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True)
|
||||
template = env.get_template('Dockerfile.jinja')
|
||||
return template
|
||||
|
||||
def write_dockerfile(output_directory, content):
|
||||
output_path = os.path.join(output_directory, 'Dockerfile')
|
||||
with open(output_path, 'w') as file:
|
||||
file.write(content)
|
||||
logging.info(f'Dockerfile has been generated at {output_path}.')
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
output_dir = setup_output_directory(args.out_dir)
|
||||
base_image = "intelcczoo/tdvm:ubuntu22.04-mvp_2023ww15" if args.intel_tdx else "ubuntu:22.04"
|
||||
|
||||
template = load_template()
|
||||
rendered_content = template.render(base_image=base_image, intel_tdx=args.intel_tdx)
|
||||
|
||||
write_dockerfile(output_dir, rendered_content)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
38
tools/docker/tdx/Dockerfile
Normal file
38
tools/docker/tdx/Dockerfile
Normal file
@ -0,0 +1,38 @@
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
ARG BASE_VERSION
|
||||
FROM asterinas/asterinas:${BASE_VERSION} AS build-base
|
||||
|
||||
# Fetch and install QEMU from the intel-staging/qemu-tdx source
|
||||
FROM build-base AS build-qemu-tdx
|
||||
|
||||
RUN apt update && apt-get install -y --no-install-recommends \
|
||||
libgcrypt-dev `# optional build dependency` \
|
||||
libglib2.0-dev `# build dependency` \
|
||||
libpixman-1-dev `# build dependency` \
|
||||
libusb-dev `# optional build dependency` \
|
||||
meson \
|
||||
ninja-build
|
||||
RUN apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
FROM build-qemu-tdx AS qemu-tdx
|
||||
|
||||
WORKDIR /root
|
||||
RUN git clone -b tdx-qemu-upstream-2024.02.29-v8.2.0 https://github.com/intel-staging/qemu-tdx.git
|
||||
WORKDIR /root/qemu-tdx
|
||||
COPY tools/docker/tdx/tdx_qemu.patch /root/qemu-tdx
|
||||
RUN git apply tdx_qemu.patch \
|
||||
&& mkdir build \
|
||||
&& cd build \
|
||||
&& ../configure --enable-kvm --target-list=x86_64-softmmu --prefix=/usr/local/qemu --enable-slirp \
|
||||
&& make -j \
|
||||
&& make install
|
||||
WORKDIR /root
|
||||
RUN rm -rf /root/qemu-tdx
|
||||
|
||||
FROM build-base
|
||||
|
||||
# Install QEMU built from the previous stages
|
||||
COPY --from=qemu-tdx /usr/local/qemu /usr/local/qemu
|
||||
|
||||
WORKDIR /root/asterinas
|
251
tools/docker/tdx/tdx_qemu.patch
Normal file
251
tools/docker/tdx/tdx_qemu.patch
Normal file
@ -0,0 +1,251 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 2c83b6d270..b9aebe1ea6 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2983,6 +2983,8 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
|
||||
addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
|
||||
rb = qemu_ram_block_from_host(addr, false, &offset);
|
||||
|
||||
+ memory_region_convert_mem_attr(§ion, !to_private);
|
||||
+
|
||||
if (to_private) {
|
||||
if (rb->page_size != qemu_host_page_size) {
|
||||
/*
|
||||
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
|
||||
index 745ead0034..6cef1b5ff2 100644
|
||||
--- a/backends/hostmem-memfd.c
|
||||
+++ b/backends/hostmem-memfd.c
|
||||
@@ -56,6 +56,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
+ ram_flags |= m->hugetlb ? RAM_GUEST_MEMFD_HUGETLB : 0;
|
||||
return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
|
||||
backend->size, ram_flags, fd, 0, errp);
|
||||
}
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 059bfdc07a..d3f7cc93e7 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -251,6 +251,7 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
|
||||
return (!memory_region_is_ram(section->mr) &&
|
||||
!memory_region_is_iommu(section->mr)) ||
|
||||
memory_region_is_protected(section->mr) ||
|
||||
+ memory_region_has_guest_memfd(section->mr) ||
|
||||
/*
|
||||
* Sizing an enabled 64-bit BAR can cause spurious mappings to
|
||||
* addresses in the upper part of the 64-bit address space. These
|
||||
@@ -347,12 +348,9 @@ out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
-static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
|
||||
- MemoryRegionSection *section)
|
||||
+static void vfio_notify_discard_generic(VFIOContainerBase *bcontainer,
|
||||
+ MemoryRegionSection *section)
|
||||
{
|
||||
- VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||
- listener);
|
||||
- VFIOContainerBase *bcontainer = vrdl->bcontainer;
|
||||
const hwaddr size = int128_get64(section->size);
|
||||
const hwaddr iova = section->offset_within_address_space;
|
||||
int ret;
|
||||
@@ -365,12 +363,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
|
||||
}
|
||||
}
|
||||
|
||||
-static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||
- MemoryRegionSection *section)
|
||||
+static int vfio_notify_populate_generic(VFIOContainerBase *bcontainer,
|
||||
+ MemoryRegionSection *section,
|
||||
+ uint64_t granularity)
|
||||
{
|
||||
- VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||
- listener);
|
||||
- VFIOContainerBase *bcontainer = vrdl->bcontainer;
|
||||
const hwaddr end = section->offset_within_region +
|
||||
int128_get64(section->size);
|
||||
hwaddr start, next, iova;
|
||||
@@ -382,7 +378,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||
* unmap in minimum granularity later.
|
||||
*/
|
||||
for (start = section->offset_within_region; start < end; start = next) {
|
||||
- next = ROUND_UP(start + 1, vrdl->granularity);
|
||||
+ next = ROUND_UP(start + 1, granularity);
|
||||
next = MIN(next, end);
|
||||
|
||||
iova = start - section->offset_within_region +
|
||||
@@ -393,13 +389,31 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||
vaddr, section->readonly);
|
||||
if (ret) {
|
||||
/* Rollback */
|
||||
- vfio_ram_discard_notify_discard(rdl, section);
|
||||
+ vfio_notify_discard_generic(bcontainer, section);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
|
||||
+ MemoryRegionSection *section)
|
||||
+{
|
||||
+ VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||
+ listener);
|
||||
+
|
||||
+ vfio_notify_discard_generic(vrdl->bcontainer, section);
|
||||
+}
|
||||
+
|
||||
+static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||
+ MemoryRegionSection *section)
|
||||
+{
|
||||
+ VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||
+ listener);
|
||||
+
|
||||
+ return vfio_notify_populate_generic(vrdl->bcontainer, section, vrdl->granularity);
|
||||
+}
|
||||
+
|
||||
static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
@@ -1353,6 +1367,19 @@ static void vfio_listener_log_sync(MemoryListener *listener,
|
||||
}
|
||||
}
|
||||
|
||||
+static void vfio_listener_convert_mem_attr(MemoryListener *listener,
|
||||
+ MemoryRegionSection *section,
|
||||
+ bool shared)
|
||||
+{
|
||||
+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, listener);
|
||||
+
|
||||
+ if (shared)
|
||||
+ vfio_notify_populate_generic(bcontainer, section,
|
||||
+ 1ULL << (63 - clz64(bcontainer->pgsizes)));
|
||||
+ else
|
||||
+ vfio_notify_discard_generic(bcontainer, section);
|
||||
+}
|
||||
+
|
||||
const MemoryListener vfio_memory_listener = {
|
||||
.name = "vfio",
|
||||
.region_add = vfio_listener_region_add,
|
||||
@@ -1360,6 +1387,7 @@ const MemoryListener vfio_memory_listener = {
|
||||
.log_global_start = vfio_listener_log_global_start,
|
||||
.log_global_stop = vfio_listener_log_global_stop,
|
||||
.log_sync = vfio_listener_log_sync,
|
||||
+ .convert_mem_attr = vfio_listener_convert_mem_attr,
|
||||
};
|
||||
|
||||
void vfio_reset_handler(void *opaque)
|
||||
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
||||
index 1e351f6fc8..d17acdb2ea 100644
|
||||
--- a/include/exec/memory.h
|
||||
+++ b/include/exec/memory.h
|
||||
@@ -246,6 +246,9 @@ typedef struct IOMMUTLBEvent {
|
||||
/* RAM can be private that has kvm guest memfd backend */
|
||||
#define RAM_GUEST_MEMFD (1 << 12)
|
||||
|
||||
+/* Hugetlb can be private that has kvm guest memfd backend */
|
||||
+#define RAM_GUEST_MEMFD_HUGETLB (1 << 13)
|
||||
+
|
||||
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
||||
IOMMUNotifierFlag flags,
|
||||
hwaddr start, hwaddr end,
|
||||
@@ -1086,6 +1089,19 @@ struct MemoryListener {
|
||||
*/
|
||||
void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section,
|
||||
hwaddr addr, hwaddr len);
|
||||
+
|
||||
+ /**
|
||||
+ * @convert_mem_attr:
|
||||
+ *
|
||||
+ * Called during the memory attribute conversion.
|
||||
+ *
|
||||
+ * @listener: The #MemoryListener
|
||||
+ * @section: The MemoryRegionSection
|
||||
+ * @shared: convert memory attribute from private to shared
|
||||
+ */
|
||||
+ void (*convert_mem_attr)(MemoryListener *listener, MemoryRegionSection *section,
|
||||
+ bool shared);
|
||||
+
|
||||
/**
|
||||
* @priority:
|
||||
*
|
||||
@@ -2541,6 +2557,14 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
|
||||
*/
|
||||
void memory_global_dirty_log_sync(bool last_stage);
|
||||
|
||||
+/**
|
||||
+ * memory_region_convert_mem_attr: convert the memory attribute
|
||||
+ * @section: the #MemoryRegionSection to be converted
|
||||
+ * @shared: if true, convert attribute from private to shared;
|
||||
+ * if false, convert from shared to private
|
||||
+ */
|
||||
+void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared);
|
||||
+
|
||||
/**
|
||||
* memory_global_dirty_log_sync: synchronize the dirty log for all memory
|
||||
*
|
||||
diff --git a/system/memory.c b/system/memory.c
|
||||
index 85a22408e9..e9a94e1654 100644
|
||||
--- a/system/memory.c
|
||||
+++ b/system/memory.c
|
||||
@@ -3009,6 +3009,21 @@ void memory_global_dirty_log_stop(unsigned int flags)
|
||||
memory_global_dirty_log_do_stop(flags);
|
||||
}
|
||||
|
||||
+void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared)
|
||||
+{
|
||||
+ MemoryListener *listener;
|
||||
+ if (!section->mr || !memory_region_has_guest_memfd(section->mr)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ QTAILQ_FOREACH(listener, &memory_listeners, link) {
|
||||
+ if (!listener->convert_mem_attr) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ listener->convert_mem_attr(listener, section, shared);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void listener_add_address_space(MemoryListener *listener,
|
||||
AddressSpace *as)
|
||||
{
|
||||
diff --git a/system/physmem.c b/system/physmem.c
|
||||
index 8c9368bc99..688f76e425 100644
|
||||
--- a/system/physmem.c
|
||||
+++ b/system/physmem.c
|
||||
@@ -1803,6 +1803,10 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
|
||||
}
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_KVM
|
||||
+#define KVM_GUEST_MEMFD_HUGETLB (1 << 1)
|
||||
+#endif
|
||||
+
|
||||
static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
{
|
||||
const bool noreserve = qemu_ram_is_noreserve(new_block);
|
||||
@@ -1844,8 +1848,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
if (kvm_enabled() && (new_block->flags & RAM_GUEST_MEMFD)) {
|
||||
assert(new_block->guest_memfd < 0);
|
||||
|
||||
- new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
||||
- 0, errp);
|
||||
+ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
||||
+ (new_block->flags & RAM_GUEST_MEMFD_HUGETLB) ? KVM_GUEST_MEMFD_HUGETLB : 0, errp);
|
||||
if (new_block->guest_memfd < 0) {
|
||||
qemu_mutex_unlock_ramlist();
|
||||
return;
|
||||
@@ -1914,7 +1918,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
/* Just support these ram flags by now. */
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
|
||||
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
|
||||
- RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
|
||||
+ RAM_READONLY_FD | RAM_GUEST_MEMFD |RAM_GUEST_MEMFD_HUGETLB)) == 0);
|
||||
|
||||
if (xen_enabled()) {
|
||||
error_setg(errp, "-mem-path not supported with Xen");
|
@ -52,13 +52,11 @@ if [ "$1" = "tdx" ]; then
|
||||
-vga none \
|
||||
-nographic \
|
||||
-monitor pty \
|
||||
-no-hpet \
|
||||
-nodefaults \
|
||||
-bios /usr/share/qemu/OVMF.fd \
|
||||
-object tdx-guest,sept-ve-disable=on,id=tdx,quote-generation-service=vsock:2:4050 \
|
||||
-bios /root/ovmf/release/OVMF.fd \
|
||||
-object tdx-guest,sept-ve-disable=on,id=tdx0 \
|
||||
-cpu host,-kvm-steal-time,pmu=off \
|
||||
-machine q35,kernel_irqchip=split,confidential-guest-support=tdx,memory-backend=ram1 \
|
||||
-object memory-backend-memfd-private,id=ram1,size=${MEM:-8G} \
|
||||
-machine q35,kernel-irqchip=split,confidential-guest-support=tdx0 \
|
||||
-device virtio-net-pci,netdev=net01,disable-legacy=on,disable-modern=off$VIRTIO_NET_FEATURES \
|
||||
-device virtio-keyboard-pci,disable-legacy=on,disable-modern=off \
|
||||
$NETDEV_ARGS \
|
||||
|
Loading…
x
Reference in New Issue
Block a user